From 4b6cd34c5c4436c60747a30d15a915cb2702ca0b Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Mon, 27 May 2019 14:32:48 +0300 Subject: parse_decimal(): assume spaces are equivalent to non-breaking spaces when not in strict mode Fixes #637 --- babel/numbers.py | 11 +++++++++++ tests/test_numbers.py | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/babel/numbers.py b/babel/numbers.py index e5650dd..3dcd730 100644 --- a/babel/numbers.py +++ b/babel/numbers.py @@ -673,6 +673,8 @@ def parse_decimal(string, locale=LC_NUMERIC, strict=False): Decimal('1099.98') >>> parse_decimal('1.099,98', locale='de') Decimal('1099.98') + >>> parse_decimal('12 345,123', locale='ru') + Decimal('12345.123') When the given string cannot be parsed, an exception is raised: @@ -704,6 +706,15 @@ def parse_decimal(string, locale=LC_NUMERIC, strict=False): locale = Locale.parse(locale) group_symbol = get_group_symbol(locale) decimal_symbol = get_decimal_symbol(locale) + + if not strict and ( + group_symbol == u'\xa0' and # if the grouping symbol is U+00A0 NO-BREAK SPACE, + group_symbol not in string and # and the string to be parsed does not contain it, + ' ' in string # but it does contain a space instead, + ): + # ... it's reasonable to assume it is taking the place of the grouping symbol. + string = string.replace(' ', group_symbol) + try: parsed = decimal.Decimal(string.replace(group_symbol, '') .replace(decimal_symbol, '.')) diff --git a/tests/test_numbers.py b/tests/test_numbers.py index 99aa96b..27649f8 100644 --- a/tests/test_numbers.py +++ b/tests/test_numbers.py @@ -679,3 +679,14 @@ def test_numberpattern_repr(): def test_parse_static_pattern(): assert numbers.parse_pattern('Kun') # in the So locale in CLDR 30 # TODO: static patterns might not be correctly `apply()`ed at present + + +def test_parse_decimal_nbsp_heuristics(): + # Re https://github.com/python-babel/babel/issues/637 – + # for locales (of which there are many) that use U+00A0 as the group + # separator in numbers, it's reasonable to assume that input strings + # with plain spaces actually should have U+00A0s instead. + # This heuristic is only applied when strict=False. + n = decimal.Decimal("12345.123") + assert numbers.parse_decimal("12 345.123", locale="fi") == n + assert numbers.parse_decimal(numbers.format_decimal(n, locale="fi"), locale="fi") == n -- cgit v1.2.1