summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Twomey <mick@twomeylee.name>2013-10-16 19:12:12 +0100
committerMichael Twomey <mick@twomeylee.name>2013-10-16 19:12:12 +0100
commite2f5a7a8ad904991b4ce3ff0091e5e1059272536 (patch)
treed04d05847fc57a690a2e5478114cda8640dc14c6
parent2e60ca3636e739fb1b68e803a7698f55df2b42af (diff)
downloadpyiso8601-e2f5a7a8ad904991b4ce3ff0091e5e1059272536.tar.gz
Rewrote the regex, large parts of the code and tests
Added more tests to cover more of the spec and fixed many edge cases.
-rw-r--r--iso8601/iso8601.py112
-rw-r--r--iso8601/test_iso8601.py32
2 files changed, 110 insertions, 34 deletions
diff --git a/iso8601/iso8601.py b/iso8601/iso8601.py
index eee6741..a121e2e 100644
--- a/iso8601/iso8601.py
+++ b/iso8601/iso8601.py
@@ -8,12 +8,19 @@ datetime.datetime(2007, 1, 25, 12, 0, tzinfo=<iso8601.iso8601.Utc ...>)
"""
-from datetime import datetime, timedelta, tzinfo
+from datetime import (
+ datetime,
+ timedelta,
+ tzinfo
+)
+import logging
import sys
import re
__all__ = ["parse_date", "ParseError"]
+LOG = logging.getLogger(__name__)
+
if sys.version_info >= (3, 0, 0):
_basestring = str
else:
@@ -21,11 +28,32 @@ else:
# Adapted from http://delete.me.uk/2005/03/iso8601.html
-ISO8601_REGEX = re.compile(r"(?P<year>[0-9]{4})(-(?P<month>[0-9]{1,2})(-(?P<day>[0-9]{1,2})"
- r"((?P<separator>.)(?P<hour>[0-9]{2}):(?P<minute>[0-9]{2})(:(?P<second>[0-9]{2})(\.(?P<fraction>[0-9]+))?)?"
- r"(?P<timezone>Z|(([-+])([0-9]{2}):([0-9]{2})))?)?)?)?"
+ISO8601_REGEX = re.compile(r"""
+ (?P<year>[0-9]{4})
+ (-{0,1}(?P<month>[0-9]{1,2})){1}
+ (-{0,1}(?P<day>[0-9]{1,2})){1}
+ (
+ (?P<separator>[ T])
+ (?P<hour>[0-9]{2})
+ (:{0,1}(?P<minute>[0-9]{2})){0,1}
+ (
+ :{0,1}(?P<second>[0-9]{1,2})
+ (\.(?P<second_fraction>[0-9]+)){0,1}
+ ){0,1}
+ (?P<timezone>
+ Z
+ |
+ (
+ (?P<tz_sign>[-+])
+ (?P<tz_hour>[0-9]{2})
+ :{0,1}
+ (?P<tz_minute>[0-9]{2}){0,1}
+ )
+ ){0,1}
+ ){0,1}
+ """,
+ re.VERBOSE
)
-TIMEZONE_REGEX = re.compile("(?P<prefix>[+-])(?P<hours>[0-9]{2}).(?P<minutes>[0-9]{2})")
class ParseError(Exception):
"""Raised when there is a problem parsing a date string"""
@@ -44,6 +72,7 @@ class Utc(tzinfo):
def dst(self, dt):
return ZERO
+
UTC = Utc()
class FixedOffset(tzinfo):
@@ -54,6 +83,17 @@ class FixedOffset(tzinfo):
self.__offset = timedelta(hours=offset_hours, minutes=offset_minutes)
self.__name = name
+ def __eq__(self, other):
+ if isinstance(other, FixedOffset):
+ return (
+ (other.__offset == self.__offset)
+ and
+ (other.__name == self.__name)
+ )
+ if isinstance(other, tzinfo):
+ return other == self
+ return False
+
def utcoffset(self, dt):
return self.__offset
@@ -64,26 +104,42 @@ class FixedOffset(tzinfo):
return ZERO
def __repr__(self):
- return "<FixedOffset %r>" % self.__name
+ return "<FixedOffset %r %r>" % (self.__name, self.__offset)
+
+def to_int(d, key, default_to_zero=False, default=None):
+ """Pull a value from the dict and convert to int
+
+ :param default_to_zero: If the value is None or empty, treat it as zero
+ :param default: If the value is missing in the dict use this default
-def parse_timezone(tzstring, default_timezone=UTC):
+ """
+ value = d.get(key, default)
+ LOG.debug("Got %r for %r with default %r", value, key, default)
+ if (value in ["", None]) and default_to_zero:
+ return 0
+ if value is None:
+ raise ParseError("Unable to read %s from %s" % (key, d))
+ return int(value)
+
+def parse_timezone(matches, default_timezone=UTC):
"""Parses ISO 8601 time zone specs into tzinfo offsets
"""
- if tzstring == "Z":
+
+ if matches["timezone"] == "Z":
return default_timezone
# This isn't strictly correct, but it's common to encounter dates without
# timezones so I'll assume the default (which defaults to UTC).
# Addresses issue 4.
- if tzstring is None:
+ if matches["timezone"] is None:
return default_timezone
- m = TIMEZONE_REGEX.match(tzstring)
- prefix, hours, minutes = m.groups()
- hours, minutes = int(hours), int(minutes)
- if prefix == "-":
+ sign = matches["tz_sign"]
+ hours = to_int(matches, "tz_hour")
+ minutes = to_int(matches, "tz_minute", default_to_zero=True)
+ description = "%s%02d:%02d" % (sign, hours, minutes)
+ if sign == "-":
hours = -hours
- minutes = -minutes
- return FixedOffset(hours, minutes, tzstring)
+ return FixedOffset(hours, minutes, description)
def parse_date(datestring, default_timezone=UTC):
"""Parses ISO 8601 dates into datetime objects
@@ -99,16 +155,22 @@ def parse_date(datestring, default_timezone=UTC):
if not m:
raise ParseError("Unable to parse date string %r" % datestring)
groups = m.groupdict()
- tz = parse_timezone(groups["timezone"], default_timezone=default_timezone)
- if groups["fraction"] is None:
- groups["fraction"] = 0
- else:
- groups["fraction"] = int(float("0.%s" % groups["fraction"]) * 1e6)
- if groups["second"] is None:
- groups["second"] = 0
+ LOG.debug("Parsed %s into %s with default timezone %s", datestring, groups, default_timezone)
+
+ tz = parse_timezone(groups, default_timezone=default_timezone)
+
+ groups["second_fraction"] = int(float("0.%s" % to_int(groups, "second_fraction", default_to_zero=True)) * 1e6)
+
try:
- return datetime(int(groups["year"]), int(groups["month"]), int(groups["day"]),
- int(groups["hour"]), int(groups["minute"]), int(groups["second"]),
- int(groups["fraction"]), tz)
+ return datetime(
+ year=to_int(groups, "year"),
+ month=to_int(groups, "month"),
+ day=to_int(groups, "day"),
+ hour=to_int(groups, "hour"),
+ minute=to_int(groups, "minute", default_to_zero=True),
+ second=to_int(groups, "second", default_to_zero=True),
+ microsecond=groups["second_fraction"],
+ tzinfo=tz,
+ )
except Exception as e:
raise ParseError(e)
diff --git a/iso8601/test_iso8601.py b/iso8601/test_iso8601.py
index 5e1aa7f..5b5e760 100644
--- a/iso8601/test_iso8601.py
+++ b/iso8601/test_iso8601.py
@@ -1,3 +1,4 @@
+# coding=UTF-8
from __future__ import absolute_import
import datetime
@@ -9,15 +10,10 @@ from iso8601 import iso8601
def test_iso8601_regex():
assert iso8601.ISO8601_REGEX.match("2006-10-11T00:14:33Z")
-def test_timezone_regex():
- assert iso8601.TIMEZONE_REGEX.match("+01:00")
- assert iso8601.TIMEZONE_REGEX.match("+00:00")
- assert iso8601.TIMEZONE_REGEX.match("+01:20")
- assert iso8601.TIMEZONE_REGEX.match("-01:00")
-
def test_parse_no_timezone_different_default():
tz = iso8601.FixedOffset(2, 0, "test offset")
d = iso8601.parse_date("2007-01-01T08:00:00", default_timezone=tz)
+ assert d == datetime.datetime(2007, 1, 1, 8, 0, 0, 0, tz)
assert d.tzinfo == tz
@pytest.mark.parametrize("invalid_date", [
@@ -26,8 +22,9 @@ def test_parse_no_timezone_different_default():
("",),
(None,),
("23",),
+ ("131015T142533Z",),
+ ("131015",),
])
-
def test_parse_invalid_date(invalid_date):
with pytest.raises(iso8601.ParseError) as exc:
iso8601.parse_date(invalid_date)
@@ -41,7 +38,24 @@ def test_parse_invalid_date(invalid_date):
("2006-10-20T15:34:56Z", datetime.datetime(2006, 10, 20, 15, 34, 56, 0, iso8601.UTC)),
("2007-5-7T11:43:55.328Z'", datetime.datetime(2007, 5, 7, 11, 43, 55, 328000, iso8601.UTC)), # http://code.google.com/p/pyiso8601/issues/detail?id=6
("2006-10-20T15:34:56.123Z", datetime.datetime(2006, 10, 20, 15, 34, 56, 123000, iso8601.UTC)),
+ ("2013-10-15T18:30Z", datetime.datetime(2013, 10, 15, 18, 30, 0, 0, iso8601.UTC)),
+ ("2013-10-15T22:30+04", datetime.datetime(2013, 10, 15, 22, 30, 0, 0, iso8601.FixedOffset(4, 0, "+04:00"))), # <time>±hh:mm
+ ("2013-10-15T1130-0700", datetime.datetime(2013, 10, 15, 11, 30, 0, 0, iso8601.FixedOffset(-7, 0, "-07:00"))), # <time>±hhmm
+ ("2013-10-15T15:00-03:30", datetime.datetime(2013, 10, 15, 15, 0, 0, 0, iso8601.FixedOffset(-3, 30, "-03:30"))), # <time>±hh
+ ("2013-10-15T183123Z", datetime.datetime(2013, 10, 15, 18, 31, 23, 0, iso8601.UTC)), # hhmmss
+ ("2013-10-15T1831Z", datetime.datetime(2013, 10, 15, 18, 31, 0, 0, iso8601.UTC)), # hhmm
+ ("2013-10-15T18Z", datetime.datetime(2013, 10, 15, 18, 0, 0, 0, iso8601.UTC)), # hh
+ ("20131015T18:30Z", datetime.datetime(2013, 10, 15, 18, 30, 0, 0, iso8601.UTC)), # YYYYMMDD
])
-
def test_parse_valid_date(valid_date, expected_datetime):
- assert iso8601.parse_date(valid_date) == expected_datetime
+ parsed = iso8601.parse_date(valid_date)
+ assert parsed.year == expected_datetime.year
+ assert parsed.month == expected_datetime.month
+ assert parsed.day == expected_datetime.day
+ assert parsed.hour == expected_datetime.hour
+ assert parsed.minute == expected_datetime.minute
+ assert parsed.second == expected_datetime.second
+ assert parsed.microsecond == expected_datetime.microsecond
+ assert parsed.tzinfo == expected_datetime.tzinfo
+ assert parsed == expected_datetime
+ assert parsed.isoformat() == expected_datetime.isoformat()