diff options
author | Aarni Koskela <akx@iki.fi> | 2022-01-28 13:50:49 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-28 13:50:49 +0200 |
commit | 9033f020783117b8cc069249d98f8a6060916f8d (patch) | |
tree | 7928109d6af1e2b3735eabe434494ec06f3f90e4 | |
parent | 89686fc4adc8376d7cec72ca822a71a1e39d2a65 (diff) | |
parent | 8a5e4bdb7f40bfbd8f6f0f6f22479e6f0f190da1 (diff) | |
download | babel-9033f020783117b8cc069249d98f8a6060916f8d.tar.gz |
Merge pull request #834 from akx/improve-date-parse
Improve date/time parsing
-rw-r--r-- | babel/dates.py | 63 | ||||
-rw-r--r-- | tests/test_dates.py | 35 |
2 files changed, 73 insertions, 25 deletions
diff --git a/babel/dates.py b/babel/dates.py index f94c60a..1323503 100644 --- a/babel/dates.py +++ b/babel/dates.py @@ -1138,7 +1138,11 @@ def get_period_id(time, tzinfo=None, type=None, locale=LC_TIME): return "pm" -def parse_date(string, locale=LC_TIME): +class ParseError(ValueError): + pass + + +def parse_date(string, locale=LC_TIME, format='medium'): """Parse a date from a string. This function uses the date format for the locale as a hint to determine @@ -1151,14 +1155,19 @@ def parse_date(string, locale=LC_TIME): :param string: the string containing the date :param locale: a `Locale` object or a locale identifier + :param format: the format to use (see ``get_date_format``) """ + numbers = re.findall(r'(\d+)', string) + if not numbers: + raise ParseError("No numbers were found in input") + # TODO: try ISO format first? - format = get_date_format(locale=locale).pattern.lower() - year_idx = format.index('y') - month_idx = format.index('m') + format_str = get_date_format(format=format, locale=locale).pattern.lower() + year_idx = format_str.index('y') + month_idx = format_str.index('m') if month_idx < 0: - month_idx = format.index('l') - day_idx = format.index('d') + month_idx = format_str.index('l') + day_idx = format_str.index('d') indexes = [(year_idx, 'Y'), (month_idx, 'M'), (day_idx, 'D')] indexes.sort() @@ -1167,7 +1176,6 @@ def parse_date(string, locale=LC_TIME): # FIXME: this currently only supports numbers, but should also support month # names, both in the requested locale, and english - numbers = re.findall(r'(\d+)', string) year = numbers[indexes['Y']] if len(year) == 2: year = 2000 + int(year) @@ -1180,7 +1188,7 @@ def parse_date(string, locale=LC_TIME): return date(year, month, day) -def parse_time(string, locale=LC_TIME): +def parse_time(string, locale=LC_TIME, format='medium'): """Parse a time from a string. This function uses the time format for the locale as a hint to determine @@ -1191,29 +1199,42 @@ def parse_time(string, locale=LC_TIME): :param string: the string containing the time :param locale: a `Locale` object or a locale identifier + :param format: the format to use (see ``get_time_format``) :return: the parsed time :rtype: `time` """ + numbers = re.findall(r'(\d+)', string) + if not numbers: + raise ParseError("No numbers were found in input") + # TODO: try ISO format first? - format = get_time_format(locale=locale).pattern.lower() - hour_idx = format.index('h') + format_str = get_time_format(format=format, locale=locale).pattern.lower() + hour_idx = format_str.index('h') if hour_idx < 0: - hour_idx = format.index('k') - min_idx = format.index('m') - sec_idx = format.index('s') + hour_idx = format_str.index('k') + min_idx = format_str.index('m') + sec_idx = format_str.index('s') indexes = [(hour_idx, 'H'), (min_idx, 'M'), (sec_idx, 'S')] indexes.sort() indexes = dict([(item[1], idx) for idx, item in enumerate(indexes)]) - # FIXME: support 12 hour clock, and 0-based hour specification - # and seconds should be optional, maybe minutes too - # oh, and time-zones, of course - - numbers = re.findall(r'(\d+)', string) - hour = int(numbers[indexes['H']]) - minute = int(numbers[indexes['M']]) - second = int(numbers[indexes['S']]) + # TODO: support time zones + + # Check if the format specifies a period to be used; + # if it does, look for 'pm' to figure out an offset. + hour_offset = 0 + if 'a' in format_str: + if 'pm' in string.lower(): + hour_offset = 12 + + # Parse up to three numbers from the string. + minute = second = 0 + hour = int(numbers[indexes['H']]) + hour_offset + if len(numbers) > 1: + minute = int(numbers[indexes['M']]) + if len(numbers) > 2: + second = int(numbers[indexes['S']]) return time(hour, minute, second) diff --git a/tests/test_dates.py b/tests/test_dates.py index d11758c..2a883ab 100644 --- a/tests/test_dates.py +++ b/tests/test_dates.py @@ -775,10 +775,37 @@ def test_format_timedelta(): def test_parse_date(): assert dates.parse_date('4/1/04', locale='en_US') == date(2004, 4, 1) assert dates.parse_date('01.04.2004', locale='de_DE') == date(2004, 4, 1) - - -def test_parse_time(): - assert dates.parse_time('15:30:00', locale='en_US') == time(15, 30) + assert dates.parse_date('2004-04-01', locale='sv_SE', format='short') == date(2004, 4, 1) + + +@pytest.mark.parametrize('input, expected', [ + # base case, fully qualified time + ('15:30:00', time(15, 30)), + # test digits + ('15:30', time(15, 30)), + ('3:30', time(3, 30)), + ('00:30', time(0, 30)), + # test am parsing + ('03:30 am', time(3, 30)), + ('3:30:21 am', time(3, 30, 21)), + ('3:30 am', time(3, 30)), + # test pm parsing + ('03:30 pm', time(15, 30)), + ('03:30 pM', time(15, 30)), + ('03:30 Pm', time(15, 30)), + ('03:30 PM', time(15, 30)), + # test hour-only parsing + ('4 pm', time(16, 0)), +]) +def test_parse_time(input, expected): + assert dates.parse_time(input, locale='en_US') == expected + + +@pytest.mark.parametrize('case', ['', 'a', 'aaa']) +@pytest.mark.parametrize('func', [dates.parse_date, dates.parse_time]) +def test_parse_errors(case, func): + with pytest.raises(dates.ParseError): + func(case, locale='en_US') def test_datetime_format_get_week_number(): |