diff options
author | Armin Ronacher <armin.ronacher@active-4.com> | 2013-07-04 18:19:51 +0200 |
---|---|---|
committer | Armin Ronacher <armin.ronacher@active-4.com> | 2013-07-04 18:19:51 +0200 |
commit | e8168071afb3f68e1e366073ca73b10a2e2f0b93 (patch) | |
tree | e028f726887b3348ea18481842aacfeca2005770 | |
parent | 874e39cb47cbba9b458b7bfe28fd4a4fc4d91844 (diff) | |
download | babel-e8168071afb3f68e1e366073ca73b10a2e2f0b93.tar.gz |
Made babel work with the latest version of the CLDR (23)
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | babel/global.dat | bin | 15987 -> 16221 bytes | |||
-rw-r--r-- | babel/plural.py | 92 | ||||
-rwxr-xr-x | scripts/download_import_cldr.py | 8 | ||||
-rwxr-xr-x | scripts/import_cldr.py | 57 |
5 files changed, 104 insertions, 55 deletions
@@ -1,5 +1,5 @@ *.so -doc/_build +docs/_build *.pyc *.pyo *.egg-info diff --git a/babel/global.dat b/babel/global.dat Binary files differindex 4eb6099..8ddf2b9 100644 --- a/babel/global.dat +++ b/babel/global.dat diff --git a/babel/plural.py b/babel/plural.py index 6ac4d05..378d81f 100644 --- a/babel/plural.py +++ b/babel/plural.py @@ -148,14 +148,19 @@ def to_python(rule): 'one' >>> func(3) 'few' + >>> func = to_python({'one': 'n in 1,11', 'few': 'n in 3..10,13..19'}) + >>> func(11) + 'one' + >>> func(15) + 'few' :param rule: the rules as list or dict, or a `PluralRule` object :return: a corresponding Python function :raise RuleError: if the expression is malformed """ namespace = { - 'IN': in_range, - 'WITHIN': within_range, + 'IN': in_range_list, + 'WITHIN': within_range_list, 'MOD': cldr_modulo } to_python = _PythonCompiler().compile @@ -191,36 +196,44 @@ def to_gettext(rule): return ''.join(result) -def in_range(num, min, max): - """Integer range test. This is the callback for the "in" operator +def in_range_list(num, range_list): + """Integer range list test. This is the callback for the "in" operator of the UTS #35 pluralization rule language: - >>> in_range(1, 1, 3) + >>> in_range_list(1, [(1, 3)]) + True + >>> in_range_list(3, [(1, 3)]) True - >>> in_range(3, 1, 3) + >>> in_range_list(3, [(1, 3), (5, 8)]) True - >>> in_range(1.2, 1, 4) + >>> in_range_list(1.2, [(1, 4)]) False - >>> in_range(10, 1, 4) + >>> in_range_list(10, [(1, 4)]) + False + >>> in_range_list(10, [(1, 4), (6, 8)]) False """ - return num == int(num) and within_range(num, min, max) + return num == int(num) and within_range_list(num, range_list) -def within_range(num, min, max): +def within_range_list(num, range_list): """Float range test. This is the callback for the "within" operator of the UTS #35 pluralization rule language: - >>> within_range(1, 1, 3) + >>> within_range_list(1, [(1, 3)]) + True + >>> within_range_list(1.0, [(1, 3)]) True - >>> within_range(1.0, 1, 3) + >>> within_range_list(1.2, [(1, 4)]) True - >>> within_range(1.2, 1, 4) + >>> within_range_list(8.8, [(1, 4), (7, 15)]) True - >>> within_range(10, 1, 4) + >>> within_range_list(10, [(1, 4)]) + False + >>> within_range_list(10.5, [(1, 4), (20, 30)]) False """ - return num >= min and num <= max + return any(num >= min_ and num <= max_ for min_, max_ in range_list) def cldr_modulo(a, b): @@ -254,21 +267,24 @@ class _Parser(object): """Internal parser. This class can translate a single rule into an abstract tree of tuples. It implements the following grammar:: - condition = and_condition ('or' and_condition)* + condition = and_condition ('or' and_condition)* and_condition = relation ('and' relation)* - relation = is_relation | in_relation | within_relation | 'n' <EOL> - is_relation = expr 'is' ('not')? value - in_relation = expr ('not')? 'in' range - within_relation = expr ('not')? 'within' range - expr = 'n' ('mod' value)? - value = digit+ - digit = 0|1|2|3|4|5|6|7|8|9 - range = value'..'value + relation = is_relation | in_relation | within_relation | 'n' <EOL> + is_relation = expr 'is' ('not')? value + in_relation = expr ('not')? 'in' range_list + within_relation = expr ('not')? 'within' range_list + expr = 'n' ('mod' value)? + range_list = (range | value) (',' range_list)* + value = digit+ + digit = 0|1|2|3|4|5|6|7|8|9 + range = value'..'value - Whitespace can occur between or around any of the above tokens. - Rules should be mutually exclusive; for a given numeric value, only one rule should apply (i.e. the condition should only be true for one of - the plural rule elements. + the plural rule elements). + - The in and within relations can take comma-separated lists, such as: + 'n in 3,5,7..15'. The translator parses the expression on instanciation into an attribute called `ast`. @@ -278,6 +294,7 @@ class _Parser(object): (None, re.compile(r'\s+(?u)')), ('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|n)\b')), ('value', re.compile(r'\d+')), + ('comma', re.compile(r',')), ('ellipsis', re.compile(r'\.\.')) ] @@ -345,15 +362,23 @@ class _Parser(object): method = 'within' else: self.expect('word', 'in', term="'within' or 'in'") - rv = 'relation', (method, left, self.range()) + rv = 'relation', (method, left, self.range_list()) if negated: rv = 'not', (rv,) return rv - def range(self): + def range_or_value(self): left = self.value() - self.expect('ellipsis') - return 'range', (left, self.value()) + if self.skip('ellipsis'): + return((left, self.value())) + else: + return((left, left)) + + def range_list(self): + range_list = [self.range_or_value()] + while self.skip('comma'): + range_list.append(self.range_or_value()) + return 'range_list', range_list def expr(self): self.expect('word', 'n') @@ -392,9 +417,12 @@ class _Compiler(object): compile_is = _binary_compiler('(%s == %s)') compile_isnot = _binary_compiler('(%s != %s)') - def compile_relation(self, method, expr, range): - range = '%s, %s' % tuple(map(self.compile, range[1])) - return '%s(%s, %s)' % (method.upper(), self.compile(expr), range) + def compile_relation(self, method, expr, range_list): + compile_range_list = '[%s]' % ','.join( + ['(%s, %s)' % tuple(map(self.compile, range_)) + for range_ in range_list[1]]) + return '%s(%s, %s)' % (method.upper(), self.compile(expr), + compile_range_list) class _PythonCompiler(_Compiler): diff --git a/scripts/download_import_cldr.py b/scripts/download_import_cldr.py index 27e37ac..da4c1ef 100755 --- a/scripts/download_import_cldr.py +++ b/scripts/download_import_cldr.py @@ -9,9 +9,9 @@ import urllib import subprocess -URL = 'http://unicode.org/Public/cldr/1.9.1/core.zip' -FILENAME = 'core-1.9.1.zip' -FILESUM = '1c506cd7a30bf5b4f3cbb8a5b382d96c' +URL = 'http://unicode.org/Public/cldr/23/core.zip' +FILENAME = 'core-23.zip' +FILESUM = '800373f275df21bb2c569ddee2d05dd5' BLKSIZE = 131072 @@ -72,7 +72,7 @@ def main(): print common_path = os.path.join(cldr_path, 'common') - if changed: + if changed or not os.path.isdir(common_path): if os.path.isdir(common_path): log('Deleting old CLDR checkout in \'%s\'', cldr_path) shutil.rmtree(common_path) diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py index b550494..67f8265 100755 --- a/scripts/import_cldr.py +++ b/scripts/import_cldr.py @@ -103,6 +103,9 @@ def main(): '..', 'babel') sup_filename = os.path.join(srcdir, 'supplemental', 'supplementalData.xml') + bcp47_timezone = parse(os.path.join(srcdir, 'bcp47', 'timezone.xml')) + sup_windows_zones = parse(os.path.join(srcdir, 'supplemental', + 'windowsZones.xml')) sup = parse(sup_filename) # Import global data from the supplemental files @@ -112,13 +115,27 @@ def main(): territory_zones = global_data.setdefault('territory_zones', {}) zone_aliases = global_data.setdefault('zone_aliases', {}) zone_territories = global_data.setdefault('zone_territories', {}) - for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'): - tzid = elem.attrib['type'] - territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) - zone_territories[tzid] = elem.attrib['territory'] - if 'aliases' in elem.attrib: - for alias in elem.attrib['aliases'].split(): - zone_aliases[alias] = tzid + + # create auxiliary zone->territory map from the windows zones (we don't set + # the 'zones_territories' map directly here, because there are some zones + # aliases listed and we defer the decision of which ones to choose to the + # 'bcp47' data + _zone_territory_map = {} + for map_zone in sup_windows_zones.findall('.//windowsZones/mapTimezones/mapZone'): + for tzid in map_zone.attrib['type'].split(): + _zone_territory_map[tzid] = map_zone.attrib['territory'] + + for key_elem in bcp47_timezone.findall('.//keyword/key'): + if key_elem.attrib['name'] == 'tz': + for elem in key_elem.findall('type'): + aliases = elem.attrib['alias'].split() + tzid = aliases.pop(0) + territory = _zone_territory_map.get(tzid, '001') + territory_zones.setdefault(territory, []).append(tzid) + zone_territories[tzid] = territory + for alias in aliases: + zone_aliases[alias] = tzid + break # Import Metazone mapping meta_zones = global_data.setdefault('meta_zones', {}) @@ -273,6 +290,11 @@ def main(): zone_formats['fallback'] = unicode(elem.text) \ .replace('{0}', '%(0)s').replace('{1}', '%(1)s') break + for elem in tree.findall('.//timeZoneNames/fallbackRegionFormat'): + if 'draft' not in elem.attrib and 'alt' not in elem.attrib: + zone_formats['fallback_region'] = unicode(elem.text) \ + .replace('{0}', '%(0)s').replace('{1}', '%(1)s') + break time_zones = data.setdefault('time_zones', {}) for elem in tree.findall('.//timeZoneNames/zone'): @@ -380,16 +402,13 @@ def main(): # AM/PM periods = data.setdefault('periods', {}) - for elem in calendar.findall('am'): - if ('draft' in elem.attrib or 'alt' in elem.attrib) \ - and elem.tag in periods: - continue - periods[elem.tag] = unicode(elem.text) - for elem in calendar.findall('pm'): - if ('draft' in elem.attrib or 'alt' in elem.attrib) \ - and elem.tag in periods: - continue - periods[elem.tag] = unicode(elem.text) + for day_period_width in calendar.findall( + 'dayPeriods/dayPeriodContext/dayPeriodWidth'): + if day_period_width.attrib['type'] == 'wide': + for day_period in day_period_width.findall('dayPeriod'): + if 'alt' not in day_period.attrib: + periods[day_period.attrib['type']] = unicode( + day_period.text) date_formats = data.setdefault('date_formats', {}) for format in calendar.findall('dateFormats'): @@ -455,7 +474,9 @@ def main(): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in decimal_formats: continue - pattern = unicode(elem.findtext('decimalFormat/pattern')) + pattern = unicode(elem.findtext('./decimalFormat/pattern')) + if pattern == 'None': + continue decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) scientific_formats = data.setdefault('scientific_formats', {}) |