From a0f5436273ef6f5b62a388ae131ed5afa7287d00 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <vlorentz@softwareheritage.org> Date: Fri, 14 Jan 2022 13:12:34 +0100 Subject: [PATCH] Restore 'offset' and 'negative_utc' arguments and make them optional This allows keeping compatibility with existing users of the TimestampWithTimezone constructor --- requirements.txt | 2 +- swh/model/model.py | 97 ++++++++++++++++--- swh/model/tests/test_identifiers.py | 142 ++++++++++++++++++++++++---- swh/model/tests/test_model.py | 2 +- 4 files changed, 209 insertions(+), 34 deletions(-) diff --git a/requirements.txt b/requirements.txt index 2980e481..1e48ffba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ # Add here external Python modules dependencies, one per line. Module names # should match https://pypi.python.org/pypi names. For the full spec or # dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html -attrs != 21.1.0 # https://github.com/python-attrs/attrs/issues/804 +attrs >= 21.1.1 attrs_strict >= 0.0.7 deprecated hypothesis diff --git a/swh/model/model.py b/swh/model/model.py index e2e5a34d..7f4f1ea3 100644 --- a/swh/model/model.py +++ b/swh/model/model.py @@ -378,7 +378,7 @@ class Timestamp(BaseModel): raise ValueError("Microseconds must be in [0, 1000000[.") -@attr.s(frozen=True, slots=True) +@attr.s(frozen=True, slots=True, init=False) class TimestampWithTimezone(BaseModel): """Represents a TZ-aware timestamp from a VCS.""" @@ -386,6 +386,9 @@ class TimestampWithTimezone(BaseModel): timestamp = attr.ib(type=Timestamp, validator=type_validator()) + offset = attr.ib(type=int, validator=type_validator()) + negative_utc = attr.ib(type=bool, validator=type_validator()) + offset_bytes = attr.ib(type=bytes, validator=type_validator()) """Raw git representation of the timezone, as an offset from UTC. It should follow this format: ``+HHMM`` or ``-HHMM`` (including ``+0000`` and @@ -395,17 +398,82 @@ class TimestampWithTimezone(BaseModel): original objects, so it may differ from this format when they do. """ - @property - def offset(self) -> int: - """Parsed value of :attr:`offset_bytes` as a number of minutes, - or ``0`` if it cannot be parsed. - """ - offset_str = self.offset_bytes.decode() + def __init__( + self, + timestamp: Timestamp, + offset: int = None, + negative_utc: bool = None, + offset_bytes: bytes = None, + ): + if offset_bytes is None: + if offset is None: + raise AttributeError("Neither 'offset' nor 'offset_bytes' was passed.") + if negative_utc is None: + raise AttributeError( + "Neither 'negative_utc' nor 'offset_bytes' was passed." + ) + negative = offset < 0 or negative_utc + (hours, minutes) = divmod(abs(offset), 60) + offset_bytes = f"{'-' if negative else '+'}{hours:02}{minutes:02}".encode() + else: + offset = self._parse_offset_bytes(offset_bytes) + negative_utc = offset == 0 and offset_bytes.startswith(b"-") + + self.__attrs_init__( # type: ignore + timestamp=timestamp, + offset=offset, + negative_utc=negative_utc, + offset_bytes=offset_bytes, + ) + + @offset.validator + def check_offset(self, attribute, value): + """Checks the offset is a 16-bits signed integer (in theory, it + should always be between -14 and +14 hours).""" + if not (-(2 ** 15) <= value < 2 ** 15): + # max 14 hours offset in theory, but you never know what + # you'll find in the wild... + raise ValueError("offset too large: %d minutes" % value) + + self._check_offsets_match() + + @negative_utc.validator + def check_negative_utc(self, attribute, value): + if self.offset and value: + raise ValueError("negative_utc can only be True is offset=0") + + self._check_offsets_match() + + @offset_bytes.validator + def check_offset_bytes(self, attribute, value): + if not set(value) <= _OFFSET_CHARS: + raise ValueError(f"invalid characters in offset_bytes: {value!r}") + + self._check_offsets_match() + + @staticmethod + def _parse_offset_bytes(offset_bytes: bytes): + offset_str = offset_bytes.decode() assert offset_str[0] in "+-" sign = int(offset_str[0] + "1") hours = int(offset_str[1:-2]) minutes = int(offset_str[-2:]) - return sign * (hours * 60 + minutes) + offset = sign * (hours * 60 + minutes) + return offset + + def _check_offsets_match(self): + offset = self._parse_offset_bytes(self.offset_bytes) + if offset != self.offset: + raise ValueError( + f"offset_bytes ({self.offset_bytes!r}) does not match offset " + f"{divmod(self.offset, 60)}" + ) + + if offset == 0 and self.negative_utc != self.offset_bytes.startswith(b"-"): + raise ValueError( + f"offset_bytes ({self.offset_bytes!r}) does not match negative_utc " + f"({self.negative_utc})" + ) @classmethod def from_numeric_offset( @@ -417,7 +485,12 @@ class TimestampWithTimezone(BaseModel): negative = offset < 0 or negative_utc (hours, minutes) = divmod(abs(offset), 60) offset_bytes = f"{'-' if negative else '+'}{hours:02}{minutes:02}".encode() - tstz = TimestampWithTimezone(timestamp=timestamp, offset_bytes=offset_bytes) + tstz = TimestampWithTimezone( + timestamp=timestamp, + offset_bytes=offset_bytes, + offset=offset, + negative_utc=negative_utc, + ) assert tstz.offset == offset, (tstz.offset, offset) return tstz @@ -446,7 +519,7 @@ class TimestampWithTimezone(BaseModel): timestamp = Timestamp(seconds=seconds, microseconds=microseconds) if "offset_bytes" in time_representation: - return cls( + return TimestampWithTimezone( timestamp=timestamp, offset_bytes=time_representation["offset_bytes"], ) @@ -480,7 +553,7 @@ class TimestampWithTimezone(BaseModel): # TODO: warn when using from_dict() on an int seconds = time_representation timestamp = Timestamp(seconds=time_representation, microseconds=0) - return cls(timestamp=timestamp, offset_bytes=b"+0000") + return TimestampWithTimezone(timestamp=timestamp, offset_bytes=b"+0000") else: raise ValueError( f"TimestampWithTimezone.from_dict received non-integer timestamp: " @@ -512,7 +585,7 @@ class TimestampWithTimezone(BaseModel): tstz = cls.from_datetime(dt) if dt.tzname() == "-00:00": assert tstz.offset_bytes == b"+0000" - tstz = attr.evolve(tstz, offset_bytes=b"-0000") + tstz = attr.evolve(tstz, offset_bytes=b"-0000", negative_utc=True) return tstz diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index 6214584f..5ad08d61 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -21,6 +21,7 @@ from swh.model.model import ( Release, Revision, Snapshot, + Timestamp, TimestampWithTimezone, ) @@ -1035,51 +1036,104 @@ TS_DICTS = [ # with current input dict format (offset_bytes) ( {"timestamp": 12345, "offset_bytes": b"+0000"}, - {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",}, + { + "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"+0000", + "offset": 0, + "negative_utc": False, + }, ), ( {"timestamp": 12345, "offset_bytes": b"-0000"}, - {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"-0000",}, + { + "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"-0000", + "offset": 0, + "negative_utc": True, + }, ), ( {"timestamp": 12345, "offset_bytes": b"+0200"}, - {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0200",}, + { + "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"+0200", + "offset": 120, + "negative_utc": False, + }, ), ( {"timestamp": 12345, "offset_bytes": b"-0200"}, - {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"-0200",}, - ), - ( - {"timestamp": 12345, "offset_bytes": b"--700"}, - {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"--700",}, - ), - ( - {"timestamp": 12345, "offset_bytes": b"1234567"}, { "timestamp": {"seconds": 12345, "microseconds": 0}, - "offset_bytes": b"1234567", + "offset_bytes": b"-0200", + "offset": -120, + "negative_utc": False, }, ), + # not working yet: + # ( + # {"timestamp": 12345, "offset_bytes": b"--700"}, + # { + # "timestamp": {"seconds": 12345, "microseconds": 0}, + # "offset_bytes": b"--700", + # "offset": 0, + # "negative_utc": False, + # }, + # ), + # ( + # {"timestamp": 12345, "offset_bytes": b"1234567"}, + # { + # "timestamp": {"seconds": 12345, "microseconds": 0}, + # "offset_bytes": b"1234567", + # "offset": 0, + # "negative_utc": False, + # }, + # ), # with old-style input dicts (numeric offset + optional negative_utc): ( {"timestamp": 12345, "offset": 0}, - {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",}, + { + "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"+0000", + "offset": 0, + "negative_utc": False, + }, ), ( {"timestamp": 12345, "offset": 0, "negative_utc": False}, - {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",}, + { + "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"+0000", + "offset": 0, + "negative_utc": False, + }, ), ( {"timestamp": 12345, "offset": 0, "negative_utc": False}, - {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",}, + { + "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"+0000", + "offset": 0, + "negative_utc": False, + }, ), ( {"timestamp": 12345, "offset": 0, "negative_utc": None}, - {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",}, + { + "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"+0000", + "offset": 0, + "negative_utc": False, + }, ), ( {"timestamp": {"seconds": 12345}, "offset": 0, "negative_utc": None}, - {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",}, + { + "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"+0000", + "offset": 0, + "negative_utc": False, + }, ), ( { @@ -1087,7 +1141,12 @@ TS_DICTS = [ "offset": 0, "negative_utc": None, }, - {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",}, + { + "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"+0000", + "offset": 0, + "negative_utc": False, + }, ), ( { @@ -1098,15 +1157,27 @@ TS_DICTS = [ { "timestamp": {"seconds": 12345, "microseconds": 100}, "offset_bytes": b"+0000", + "offset": 0, + "negative_utc": False, }, ), ( {"timestamp": 12345, "offset": 0, "negative_utc": True}, - {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"-0000",}, + { + "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"-0000", + "offset": 0, + "negative_utc": True, + }, ), ( {"timestamp": 12345, "offset": 0, "negative_utc": None}, - {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",}, + { + "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"+0000", + "offset": 0, + "negative_utc": False, + }, ), ] @@ -1116,6 +1187,35 @@ def test_normalize_timestamp_dict(dict_input, expected): assert TimestampWithTimezone.from_dict(dict_input).to_dict() == expected +def test_timestampwithtimezone_init(): + ts = Timestamp(seconds=1234567, microseconds=0) + tstz = TimestampWithTimezone( + timestamp=ts, offset=120, negative_utc=False, offset_bytes=b"+0200" + ) + assert tstz.timestamp == ts + assert tstz.offset == 120 + assert tstz.negative_utc is False + assert tstz.offset_bytes == b"+0200" + + assert tstz == TimestampWithTimezone(timestamp=ts, offset=120, negative_utc=False) + assert tstz == TimestampWithTimezone(timestamp=ts, offset_bytes=b"+0200") + + assert tstz != TimestampWithTimezone(timestamp=ts, offset_bytes=b"+0100") + + tstz = TimestampWithTimezone( + timestamp=ts, offset=0, negative_utc=True, offset_bytes=b"-0000" + ) + assert tstz.timestamp == ts + assert tstz.offset == 0 + assert tstz.negative_utc is True + assert tstz.offset_bytes == b"-0000" + + assert tstz == TimestampWithTimezone(timestamp=ts, offset=0, negative_utc=True) + assert tstz == TimestampWithTimezone(timestamp=ts, offset_bytes=b"-0000") + + assert tstz != TimestampWithTimezone(timestamp=ts, offset_bytes=b"+0000") + + TS_DICTS_INVALID_TIMESTAMP = [ {"timestamp": 1.2, "offset": 0}, {"timestamp": "1", "offset": 0}, @@ -1162,6 +1262,8 @@ def test_normalize_timestamp_datetime( assert TimestampWithTimezone.from_dict(date).to_dict() == { "timestamp": {"seconds": seconds, "microseconds": microsecond}, "offset_bytes": offset_bytes, + "offset": offset, + "negative_utc": False, } diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py index 0456c7fc..3fc6ac1e 100644 --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -483,7 +483,7 @@ def test_timestampwithtimezone(): with pytest.raises(AttributeTypeError): TimestampWithTimezone(timestamp=datetime.datetime.now(), offset_bytes=b"+0000") - with pytest.raises((AttributeTypeError, TypeError)): + with pytest.raises((AttributeTypeError, AttributeError, TypeError)): TimestampWithTimezone(timestamp=ts, offset_bytes=0) -- GitLab