diff options
author | Aarni Koskela <akx@iki.fi> | 2018-05-28 13:12:29 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-05-28 13:12:29 +0300 |
commit | 2d0179e9283bf23a4a3bdc402dad30df2b1bcd7a (patch) | |
tree | c06dca648ac95e59b6c1e11d4bc54ca6b7629738 | |
parent | a72cdf171a9e656ef7d8f31ebb3216c0b4e96556 (diff) | |
parent | 77dc9d4024b78c2339f7cf3bff1a2e8be8e2d0f7 (diff) | |
download | babel-2d0179e9283bf23a4a3bdc402dad30df2b1bcd7a.tar.gz |
Merge pull request #579 from python-babel/number-sys-import
import_cldr: ignore non-Latin numbering systems
-rwxr-xr-x | scripts/import_cldr.py | 157 |
1 files changed, 101 insertions, 56 deletions
diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py index cd0ec37..60aa6c2 100755 --- a/scripts/import_cldr.py +++ b/scripts/import_cldr.py @@ -389,6 +389,8 @@ def _process_local_datas(sup, srcdir, destdir, force=False, dump_json=False): territory != '001' and territory or None ])) + data['locale_id'] = locale_id + if locale_id in plural_rules: data['plural_form'] = plural_rules[locale_id] if locale_id in ordinal_rules: @@ -430,6 +432,31 @@ def _process_local_datas(sup, srcdir, destdir, force=False, dump_json=False): write_datafile(data_filename, data, dump_json=dump_json) +def _should_skip_number_elem(data, elem): + """ + Figure out whether the numbering-containing element `elem` is in a currently + non-supported (i.e. currently non-Latin) numbering system. + + If it is, a warning is raised. + + :param data: The root data element, for formatting the warning. + :param elem: Element with `numberSystem` key + :return: Boolean + """ + number_system = elem.get('numberSystem', 'latn') + + if number_system != 'latn': + log('%s: Unsupported number system "%s" in <%s numberSystem="%s">' % ( + data['locale_id'], + number_system, + elem.tag, + number_system, + )) + return True + + return False + + def _should_skip_elem(elem, type=None, dest=None): """ Check whether the given element should be skipped. @@ -701,59 +728,73 @@ def parse_calendar_datetime_skeletons(data, calendar): def parse_number_symbols(data, tree): number_symbols = data.setdefault('number_symbols', {}) - for elem in tree.findall('.//numbers/symbols/*'): - if _should_skip_elem(elem): + for symbol_elem in tree.findall('.//numbers/symbols'): + if _should_skip_number_elem(data, symbol_elem): # TODO: Support other number systems continue - number_symbols[elem.tag] = text_type(elem.text) + + for elem in symbol_elem.findall('./*'): + if _should_skip_elem(elem): + continue + number_symbols[elem.tag] = text_type(elem.text) def parse_decimal_formats(data, tree): decimal_formats = data.setdefault('decimal_formats', {}) - for elem in tree.findall('.//decimalFormats/decimalFormatLength'): - length_type = elem.attrib.get('type') - if _should_skip_elem(elem, length_type, decimal_formats): + for df_elem in tree.findall('.//decimalFormats'): + if _should_skip_number_elem(data, df_elem): # TODO: Support other number systems continue - if elem.findall('./alias'): - # TODO map the alias to its target - continue - for pattern_el in elem.findall('./decimalFormat/pattern'): - pattern_type = pattern_el.attrib.get('type') - pattern = numbers.parse_pattern(text_type(pattern_el.text)) - if pattern_type: - # This is a compact decimal format, see: - # http://www.unicode.org/reports/tr35/tr35-45/tr35-numbers.html#Compact_Number_Formats - - # These are mapped into a `compact_decimal_formats` dictionary - # with the format {length: {count: {multiplier: pattern}}}. - - # TODO: Add support for formatting them. - compact_decimal_formats = data.setdefault('compact_decimal_formats', {}) - length_map = compact_decimal_formats.setdefault(length_type, {}) - length_count_map = length_map.setdefault(pattern_el.attrib['count'], {}) - length_count_map[pattern_type] = pattern - else: - # Regular decimal format. - decimal_formats[length_type] = pattern + for elem in df_elem.findall('./decimalFormatLength'): + length_type = elem.attrib.get('type') + if _should_skip_elem(elem, length_type, decimal_formats): + continue + if elem.findall('./alias'): + # TODO map the alias to its target + continue + for pattern_el in elem.findall('./decimalFormat/pattern'): + pattern_type = pattern_el.attrib.get('type') + pattern = numbers.parse_pattern(text_type(pattern_el.text)) + if pattern_type: + # This is a compact decimal format, see: + # http://www.unicode.org/reports/tr35/tr35-45/tr35-numbers.html#Compact_Number_Formats + + # These are mapped into a `compact_decimal_formats` dictionary + # with the format {length: {count: {multiplier: pattern}}}. + + # TODO: Add support for formatting them. + compact_decimal_formats = data.setdefault('compact_decimal_formats', {}) + length_map = compact_decimal_formats.setdefault(length_type, {}) + length_count_map = length_map.setdefault(pattern_el.attrib['count'], {}) + length_count_map[pattern_type] = pattern + else: + # Regular decimal format. + decimal_formats[length_type] = pattern def parse_scientific_formats(data, tree): scientific_formats = data.setdefault('scientific_formats', {}) - for elem in tree.findall('.//scientificFormats/scientificFormatLength'): - type = elem.attrib.get('type') - if _should_skip_elem(elem, type, scientific_formats): + for sf_elem in tree.findall('.//scientificFormats'): + if _should_skip_number_elem(data, sf_elem): # TODO: Support other number systems continue - pattern = text_type(elem.findtext('scientificFormat/pattern')) - scientific_formats[type] = numbers.parse_pattern(pattern) + for elem in sf_elem.findall('./scientificFormatLength'): + type = elem.attrib.get('type') + if _should_skip_elem(elem, type, scientific_formats): + continue + pattern = text_type(elem.findtext('scientificFormat/pattern')) + scientific_formats[type] = numbers.parse_pattern(pattern) def parse_percent_formats(data, tree): percent_formats = data.setdefault('percent_formats', {}) - for elem in tree.findall('.//percentFormats/percentFormatLength'): - type = elem.attrib.get('type') - if _should_skip_elem(elem, type, percent_formats): + + for pf_elem in tree.findall('.//percentFormats'): + if _should_skip_number_elem(data, pf_elem): # TODO: Support other number systems continue - pattern = text_type(elem.findtext('percentFormat/pattern')) - percent_formats[type] = numbers.parse_pattern(pattern) + for elem in pf_elem.findall('.//percentFormatLength'): + type = elem.attrib.get('type') + if _should_skip_elem(elem, type, percent_formats): + continue + pattern = text_type(elem.findtext('percentFormat/pattern')) + percent_formats[type] = numbers.parse_pattern(pattern) def parse_currency_names(data, tree): @@ -837,25 +878,29 @@ def parse_interval_formats(data, tree): def parse_currency_formats(data, tree): currency_formats = data.setdefault('currency_formats', {}) - for length_elem in tree.findall('.//currencyFormats/currencyFormatLength'): - curr_length_type = length_elem.attrib.get('type') - for elem in length_elem.findall('currencyFormat'): - type = elem.attrib.get('type') - if curr_length_type: - # Handle `<currencyFormatLength type="short">`, etc. - # TODO(3.x): use nested dicts instead of colon-separated madness - type = '%s:%s' % (type, curr_length_type) - if _should_skip_elem(elem, type, currency_formats): - continue - for child in elem.getiterator(): - if child.tag == 'alias': - currency_formats[type] = Alias( - _translate_alias(['currency_formats', elem.attrib['type']], - child.attrib['path']) - ) - elif child.tag == 'pattern': - pattern = text_type(child.text) - currency_formats[type] = numbers.parse_pattern(pattern) + for currency_format in tree.findall('.//currencyFormats'): + if _should_skip_number_elem(data, currency_format): # TODO: Support other number systems + continue + + for length_elem in currency_format.findall('./currencyFormatLength'): + curr_length_type = length_elem.attrib.get('type') + for elem in length_elem.findall('currencyFormat'): + type = elem.attrib.get('type') + if curr_length_type: + # Handle `<currencyFormatLength type="short">`, etc. + # TODO(3.x): use nested dicts instead of colon-separated madness + type = '%s:%s' % (type, curr_length_type) + if _should_skip_elem(elem, type, currency_formats): + continue + for child in elem.getiterator(): + if child.tag == 'alias': + currency_formats[type] = Alias( + _translate_alias(['currency_formats', elem.attrib['type']], + child.attrib['path']) + ) + elif child.tag == 'pattern': + pattern = text_type(child.text) + currency_formats[type] = numbers.parse_pattern(pattern) def parse_day_period_rules(tree): |