summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAarni Koskela <akx@iki.fi>2022-01-28 13:50:49 +0200
committerGitHub <noreply@github.com>2022-01-28 13:50:49 +0200
commit9033f020783117b8cc069249d98f8a6060916f8d (patch)
tree7928109d6af1e2b3735eabe434494ec06f3f90e4
parent89686fc4adc8376d7cec72ca822a71a1e39d2a65 (diff)
parent8a5e4bdb7f40bfbd8f6f0f6f22479e6f0f190da1 (diff)
downloadbabel-9033f020783117b8cc069249d98f8a6060916f8d.tar.gz
Merge pull request #834 from akx/improve-date-parse
Improve date/time parsing
-rw-r--r--babel/dates.py63
-rw-r--r--tests/test_dates.py35
2 files changed, 73 insertions, 25 deletions
diff --git a/babel/dates.py b/babel/dates.py
index f94c60a..1323503 100644
--- a/babel/dates.py
+++ b/babel/dates.py
@@ -1138,7 +1138,11 @@ def get_period_id(time, tzinfo=None, type=None, locale=LC_TIME):
return "pm"
-def parse_date(string, locale=LC_TIME):
+class ParseError(ValueError):
+ pass
+
+
+def parse_date(string, locale=LC_TIME, format='medium'):
"""Parse a date from a string.
This function uses the date format for the locale as a hint to determine
@@ -1151,14 +1155,19 @@ def parse_date(string, locale=LC_TIME):
:param string: the string containing the date
:param locale: a `Locale` object or a locale identifier
+ :param format: the format to use (see ``get_date_format``)
"""
+ numbers = re.findall(r'(\d+)', string)
+ if not numbers:
+ raise ParseError("No numbers were found in input")
+
# TODO: try ISO format first?
- format = get_date_format(locale=locale).pattern.lower()
- year_idx = format.index('y')
- month_idx = format.index('m')
+ format_str = get_date_format(format=format, locale=locale).pattern.lower()
+ year_idx = format_str.index('y')
+ month_idx = format_str.index('m')
if month_idx < 0:
- month_idx = format.index('l')
- day_idx = format.index('d')
+ month_idx = format_str.index('l')
+ day_idx = format_str.index('d')
indexes = [(year_idx, 'Y'), (month_idx, 'M'), (day_idx, 'D')]
indexes.sort()
@@ -1167,7 +1176,6 @@ def parse_date(string, locale=LC_TIME):
# FIXME: this currently only supports numbers, but should also support month
# names, both in the requested locale, and english
- numbers = re.findall(r'(\d+)', string)
year = numbers[indexes['Y']]
if len(year) == 2:
year = 2000 + int(year)
@@ -1180,7 +1188,7 @@ def parse_date(string, locale=LC_TIME):
return date(year, month, day)
-def parse_time(string, locale=LC_TIME):
+def parse_time(string, locale=LC_TIME, format='medium'):
"""Parse a time from a string.
This function uses the time format for the locale as a hint to determine
@@ -1191,29 +1199,42 @@ def parse_time(string, locale=LC_TIME):
:param string: the string containing the time
:param locale: a `Locale` object or a locale identifier
+ :param format: the format to use (see ``get_time_format``)
:return: the parsed time
:rtype: `time`
"""
+ numbers = re.findall(r'(\d+)', string)
+ if not numbers:
+ raise ParseError("No numbers were found in input")
+
# TODO: try ISO format first?
- format = get_time_format(locale=locale).pattern.lower()
- hour_idx = format.index('h')
+ format_str = get_time_format(format=format, locale=locale).pattern.lower()
+ hour_idx = format_str.index('h')
if hour_idx < 0:
- hour_idx = format.index('k')
- min_idx = format.index('m')
- sec_idx = format.index('s')
+ hour_idx = format_str.index('k')
+ min_idx = format_str.index('m')
+ sec_idx = format_str.index('s')
indexes = [(hour_idx, 'H'), (min_idx, 'M'), (sec_idx, 'S')]
indexes.sort()
indexes = dict([(item[1], idx) for idx, item in enumerate(indexes)])
- # FIXME: support 12 hour clock, and 0-based hour specification
- # and seconds should be optional, maybe minutes too
- # oh, and time-zones, of course
-
- numbers = re.findall(r'(\d+)', string)
- hour = int(numbers[indexes['H']])
- minute = int(numbers[indexes['M']])
- second = int(numbers[indexes['S']])
+ # TODO: support time zones
+
+ # Check if the format specifies a period to be used;
+ # if it does, look for 'pm' to figure out an offset.
+ hour_offset = 0
+ if 'a' in format_str:
+ if 'pm' in string.lower():
+ hour_offset = 12
+
+ # Parse up to three numbers from the string.
+ minute = second = 0
+ hour = int(numbers[indexes['H']]) + hour_offset
+ if len(numbers) > 1:
+ minute = int(numbers[indexes['M']])
+ if len(numbers) > 2:
+ second = int(numbers[indexes['S']])
return time(hour, minute, second)
diff --git a/tests/test_dates.py b/tests/test_dates.py
index d11758c..2a883ab 100644
--- a/tests/test_dates.py
+++ b/tests/test_dates.py
@@ -775,10 +775,37 @@ def test_format_timedelta():
def test_parse_date():
assert dates.parse_date('4/1/04', locale='en_US') == date(2004, 4, 1)
assert dates.parse_date('01.04.2004', locale='de_DE') == date(2004, 4, 1)
-
-
-def test_parse_time():
- assert dates.parse_time('15:30:00', locale='en_US') == time(15, 30)
+ assert dates.parse_date('2004-04-01', locale='sv_SE', format='short') == date(2004, 4, 1)
+
+
+@pytest.mark.parametrize('input, expected', [
+ # base case, fully qualified time
+ ('15:30:00', time(15, 30)),
+ # test digits
+ ('15:30', time(15, 30)),
+ ('3:30', time(3, 30)),
+ ('00:30', time(0, 30)),
+ # test am parsing
+ ('03:30 am', time(3, 30)),
+ ('3:30:21 am', time(3, 30, 21)),
+ ('3:30 am', time(3, 30)),
+ # test pm parsing
+ ('03:30 pm', time(15, 30)),
+ ('03:30 pM', time(15, 30)),
+ ('03:30 Pm', time(15, 30)),
+ ('03:30 PM', time(15, 30)),
+ # test hour-only parsing
+ ('4 pm', time(16, 0)),
+])
+def test_parse_time(input, expected):
+ assert dates.parse_time(input, locale='en_US') == expected
+
+
+@pytest.mark.parametrize('case', ['', 'a', 'aaa'])
+@pytest.mark.parametrize('func', [dates.parse_date, dates.parse_time])
+def test_parse_errors(case, func):
+ with pytest.raises(dates.ParseError):
+ func(case, locale='en_US')
def test_datetime_format_get_week_number():