From a0f5436273ef6f5b62a388ae131ed5afa7287d00 Mon Sep 17 00:00:00 2001
From: Valentin Lorentz <vlorentz@softwareheritage.org>
Date: Fri, 14 Jan 2022 13:12:34 +0100
Subject: [PATCH] Restore 'offset' and 'negative_utc' arguments and make them
 optional

This allows keeping compatibility with existing users of the TimestampWithTimezone constructor
---
 requirements.txt                    |   2 +-
 swh/model/model.py                  |  97 ++++++++++++++++---
 swh/model/tests/test_identifiers.py | 142 ++++++++++++++++++++++++----
 swh/model/tests/test_model.py       |   2 +-
 4 files changed, 209 insertions(+), 34 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 2980e481..1e48ffba 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 # Add here external Python modules dependencies, one per line. Module names
 # should match https://pypi.python.org/pypi names. For the full spec or
 # dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html
-attrs != 21.1.0  # https://github.com/python-attrs/attrs/issues/804
+attrs >= 21.1.1
 attrs_strict >= 0.0.7
 deprecated
 hypothesis
diff --git a/swh/model/model.py b/swh/model/model.py
index e2e5a34d..7f4f1ea3 100644
--- a/swh/model/model.py
+++ b/swh/model/model.py
@@ -378,7 +378,7 @@ class Timestamp(BaseModel):
             raise ValueError("Microseconds must be in [0, 1000000[.")
 
 
-@attr.s(frozen=True, slots=True)
+@attr.s(frozen=True, slots=True, init=False)
 class TimestampWithTimezone(BaseModel):
     """Represents a TZ-aware timestamp from a VCS."""
 
@@ -386,6 +386,9 @@ class TimestampWithTimezone(BaseModel):
 
     timestamp = attr.ib(type=Timestamp, validator=type_validator())
 
+    offset = attr.ib(type=int, validator=type_validator())
+    negative_utc = attr.ib(type=bool, validator=type_validator())
+
     offset_bytes = attr.ib(type=bytes, validator=type_validator())
     """Raw git representation of the timezone, as an offset from UTC.
     It should follow this format: ``+HHMM`` or ``-HHMM`` (including ``+0000`` and
@@ -395,17 +398,82 @@ class TimestampWithTimezone(BaseModel):
     original objects, so it may differ from this format when they do.
     """
 
-    @property
-    def offset(self) -> int:
-        """Parsed value of :attr:`offset_bytes` as a number of minutes,
-        or ``0`` if it cannot be parsed.
-        """
-        offset_str = self.offset_bytes.decode()
+    def __init__(
+        self,
+        timestamp: Timestamp,
+        offset: int = None,
+        negative_utc: bool = None,
+        offset_bytes: bytes = None,
+    ):
+        if offset_bytes is None:
+            if offset is None:
+                raise AttributeError("Neither 'offset' nor 'offset_bytes' was passed.")
+            if negative_utc is None:
+                raise AttributeError(
+                    "Neither 'negative_utc' nor 'offset_bytes' was passed."
+                )
+            negative = offset < 0 or negative_utc
+            (hours, minutes) = divmod(abs(offset), 60)
+            offset_bytes = f"{'-' if negative else '+'}{hours:02}{minutes:02}".encode()
+        else:
+            offset = self._parse_offset_bytes(offset_bytes)
+            negative_utc = offset == 0 and offset_bytes.startswith(b"-")
+
+        self.__attrs_init__(  # type: ignore
+            timestamp=timestamp,
+            offset=offset,
+            negative_utc=negative_utc,
+            offset_bytes=offset_bytes,
+        )
+
+    @offset.validator
+    def check_offset(self, attribute, value):
+        """Checks the offset is a 16-bits signed integer (in theory, it
+        should always be between -14 and +14 hours)."""
+        if not (-(2 ** 15) <= value < 2 ** 15):
+            # max 14 hours offset in theory, but you never know what
+            # you'll find in the wild...
+            raise ValueError("offset too large: %d minutes" % value)
+
+        self._check_offsets_match()
+
+    @negative_utc.validator
+    def check_negative_utc(self, attribute, value):
+        if self.offset and value:
+            raise ValueError("negative_utc can only be True is offset=0")
+
+        self._check_offsets_match()
+
+    @offset_bytes.validator
+    def check_offset_bytes(self, attribute, value):
+        if not set(value) <= _OFFSET_CHARS:
+            raise ValueError(f"invalid characters in offset_bytes: {value!r}")
+
+        self._check_offsets_match()
+
+    @staticmethod
+    def _parse_offset_bytes(offset_bytes: bytes):
+        offset_str = offset_bytes.decode()
         assert offset_str[0] in "+-"
         sign = int(offset_str[0] + "1")
         hours = int(offset_str[1:-2])
         minutes = int(offset_str[-2:])
-        return sign * (hours * 60 + minutes)
+        offset = sign * (hours * 60 + minutes)
+        return offset
+
+    def _check_offsets_match(self):
+        offset = self._parse_offset_bytes(self.offset_bytes)
+        if offset != self.offset:
+            raise ValueError(
+                f"offset_bytes ({self.offset_bytes!r}) does not match offset "
+                f"{divmod(self.offset, 60)}"
+            )
+
+        if offset == 0 and self.negative_utc != self.offset_bytes.startswith(b"-"):
+            raise ValueError(
+                f"offset_bytes ({self.offset_bytes!r}) does not match negative_utc "
+                f"({self.negative_utc})"
+            )
 
     @classmethod
     def from_numeric_offset(
@@ -417,7 +485,12 @@ class TimestampWithTimezone(BaseModel):
         negative = offset < 0 or negative_utc
         (hours, minutes) = divmod(abs(offset), 60)
         offset_bytes = f"{'-' if negative else '+'}{hours:02}{minutes:02}".encode()
-        tstz = TimestampWithTimezone(timestamp=timestamp, offset_bytes=offset_bytes)
+        tstz = TimestampWithTimezone(
+            timestamp=timestamp,
+            offset_bytes=offset_bytes,
+            offset=offset,
+            negative_utc=negative_utc,
+        )
         assert tstz.offset == offset, (tstz.offset, offset)
         return tstz
 
@@ -446,7 +519,7 @@ class TimestampWithTimezone(BaseModel):
             timestamp = Timestamp(seconds=seconds, microseconds=microseconds)
 
             if "offset_bytes" in time_representation:
-                return cls(
+                return TimestampWithTimezone(
                     timestamp=timestamp,
                     offset_bytes=time_representation["offset_bytes"],
                 )
@@ -480,7 +553,7 @@ class TimestampWithTimezone(BaseModel):
             # TODO: warn when using from_dict() on an int
             seconds = time_representation
             timestamp = Timestamp(seconds=time_representation, microseconds=0)
-            return cls(timestamp=timestamp, offset_bytes=b"+0000")
+            return TimestampWithTimezone(timestamp=timestamp, offset_bytes=b"+0000")
         else:
             raise ValueError(
                 f"TimestampWithTimezone.from_dict received non-integer timestamp: "
@@ -512,7 +585,7 @@ class TimestampWithTimezone(BaseModel):
         tstz = cls.from_datetime(dt)
         if dt.tzname() == "-00:00":
             assert tstz.offset_bytes == b"+0000"
-            tstz = attr.evolve(tstz, offset_bytes=b"-0000")
+            tstz = attr.evolve(tstz, offset_bytes=b"-0000", negative_utc=True)
         return tstz
 
 
diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py
index 6214584f..5ad08d61 100644
--- a/swh/model/tests/test_identifiers.py
+++ b/swh/model/tests/test_identifiers.py
@@ -21,6 +21,7 @@ from swh.model.model import (
     Release,
     Revision,
     Snapshot,
+    Timestamp,
     TimestampWithTimezone,
 )
 
@@ -1035,51 +1036,104 @@ TS_DICTS = [
     # with current input dict format (offset_bytes)
     (
         {"timestamp": 12345, "offset_bytes": b"+0000"},
-        {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",},
+        {
+            "timestamp": {"seconds": 12345, "microseconds": 0},
+            "offset_bytes": b"+0000",
+            "offset": 0,
+            "negative_utc": False,
+        },
     ),
     (
         {"timestamp": 12345, "offset_bytes": b"-0000"},
-        {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"-0000",},
+        {
+            "timestamp": {"seconds": 12345, "microseconds": 0},
+            "offset_bytes": b"-0000",
+            "offset": 0,
+            "negative_utc": True,
+        },
     ),
     (
         {"timestamp": 12345, "offset_bytes": b"+0200"},
-        {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0200",},
+        {
+            "timestamp": {"seconds": 12345, "microseconds": 0},
+            "offset_bytes": b"+0200",
+            "offset": 120,
+            "negative_utc": False,
+        },
     ),
     (
         {"timestamp": 12345, "offset_bytes": b"-0200"},
-        {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"-0200",},
-    ),
-    (
-        {"timestamp": 12345, "offset_bytes": b"--700"},
-        {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"--700",},
-    ),
-    (
-        {"timestamp": 12345, "offset_bytes": b"1234567"},
         {
             "timestamp": {"seconds": 12345, "microseconds": 0},
-            "offset_bytes": b"1234567",
+            "offset_bytes": b"-0200",
+            "offset": -120,
+            "negative_utc": False,
         },
     ),
+    # not working yet:
+    # (
+    #     {"timestamp": 12345, "offset_bytes": b"--700"},
+    #     {
+    #         "timestamp": {"seconds": 12345, "microseconds": 0},
+    #         "offset_bytes": b"--700",
+    #         "offset": 0,
+    #         "negative_utc": False,
+    #     },
+    # ),
+    # (
+    #     {"timestamp": 12345, "offset_bytes": b"1234567"},
+    #     {
+    #         "timestamp": {"seconds": 12345, "microseconds": 0},
+    #         "offset_bytes": b"1234567",
+    #         "offset": 0,
+    #         "negative_utc": False,
+    #     },
+    # ),
     # with old-style input dicts (numeric offset + optional negative_utc):
     (
         {"timestamp": 12345, "offset": 0},
-        {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",},
+        {
+            "timestamp": {"seconds": 12345, "microseconds": 0},
+            "offset_bytes": b"+0000",
+            "offset": 0,
+            "negative_utc": False,
+        },
     ),
     (
         {"timestamp": 12345, "offset": 0, "negative_utc": False},
-        {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",},
+        {
+            "timestamp": {"seconds": 12345, "microseconds": 0},
+            "offset_bytes": b"+0000",
+            "offset": 0,
+            "negative_utc": False,
+        },
     ),
     (
         {"timestamp": 12345, "offset": 0, "negative_utc": False},
-        {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",},
+        {
+            "timestamp": {"seconds": 12345, "microseconds": 0},
+            "offset_bytes": b"+0000",
+            "offset": 0,
+            "negative_utc": False,
+        },
     ),
     (
         {"timestamp": 12345, "offset": 0, "negative_utc": None},
-        {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",},
+        {
+            "timestamp": {"seconds": 12345, "microseconds": 0},
+            "offset_bytes": b"+0000",
+            "offset": 0,
+            "negative_utc": False,
+        },
     ),
     (
         {"timestamp": {"seconds": 12345}, "offset": 0, "negative_utc": None},
-        {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",},
+        {
+            "timestamp": {"seconds": 12345, "microseconds": 0},
+            "offset_bytes": b"+0000",
+            "offset": 0,
+            "negative_utc": False,
+        },
     ),
     (
         {
@@ -1087,7 +1141,12 @@ TS_DICTS = [
             "offset": 0,
             "negative_utc": None,
         },
-        {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",},
+        {
+            "timestamp": {"seconds": 12345, "microseconds": 0},
+            "offset_bytes": b"+0000",
+            "offset": 0,
+            "negative_utc": False,
+        },
     ),
     (
         {
@@ -1098,15 +1157,27 @@ TS_DICTS = [
         {
             "timestamp": {"seconds": 12345, "microseconds": 100},
             "offset_bytes": b"+0000",
+            "offset": 0,
+            "negative_utc": False,
         },
     ),
     (
         {"timestamp": 12345, "offset": 0, "negative_utc": True},
-        {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"-0000",},
+        {
+            "timestamp": {"seconds": 12345, "microseconds": 0},
+            "offset_bytes": b"-0000",
+            "offset": 0,
+            "negative_utc": True,
+        },
     ),
     (
         {"timestamp": 12345, "offset": 0, "negative_utc": None},
-        {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",},
+        {
+            "timestamp": {"seconds": 12345, "microseconds": 0},
+            "offset_bytes": b"+0000",
+            "offset": 0,
+            "negative_utc": False,
+        },
     ),
 ]
 
@@ -1116,6 +1187,35 @@ def test_normalize_timestamp_dict(dict_input, expected):
     assert TimestampWithTimezone.from_dict(dict_input).to_dict() == expected
 
 
+def test_timestampwithtimezone_init():
+    ts = Timestamp(seconds=1234567, microseconds=0)
+    tstz = TimestampWithTimezone(
+        timestamp=ts, offset=120, negative_utc=False, offset_bytes=b"+0200"
+    )
+    assert tstz.timestamp == ts
+    assert tstz.offset == 120
+    assert tstz.negative_utc is False
+    assert tstz.offset_bytes == b"+0200"
+
+    assert tstz == TimestampWithTimezone(timestamp=ts, offset=120, negative_utc=False)
+    assert tstz == TimestampWithTimezone(timestamp=ts, offset_bytes=b"+0200")
+
+    assert tstz != TimestampWithTimezone(timestamp=ts, offset_bytes=b"+0100")
+
+    tstz = TimestampWithTimezone(
+        timestamp=ts, offset=0, negative_utc=True, offset_bytes=b"-0000"
+    )
+    assert tstz.timestamp == ts
+    assert tstz.offset == 0
+    assert tstz.negative_utc is True
+    assert tstz.offset_bytes == b"-0000"
+
+    assert tstz == TimestampWithTimezone(timestamp=ts, offset=0, negative_utc=True)
+    assert tstz == TimestampWithTimezone(timestamp=ts, offset_bytes=b"-0000")
+
+    assert tstz != TimestampWithTimezone(timestamp=ts, offset_bytes=b"+0000")
+
+
 TS_DICTS_INVALID_TIMESTAMP = [
     {"timestamp": 1.2, "offset": 0},
     {"timestamp": "1", "offset": 0},
@@ -1162,6 +1262,8 @@ def test_normalize_timestamp_datetime(
     assert TimestampWithTimezone.from_dict(date).to_dict() == {
         "timestamp": {"seconds": seconds, "microseconds": microsecond},
         "offset_bytes": offset_bytes,
+        "offset": offset,
+        "negative_utc": False,
     }
 
 
diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py
index 0456c7fc..3fc6ac1e 100644
--- a/swh/model/tests/test_model.py
+++ b/swh/model/tests/test_model.py
@@ -483,7 +483,7 @@ def test_timestampwithtimezone():
     with pytest.raises(AttributeTypeError):
         TimestampWithTimezone(timestamp=datetime.datetime.now(), offset_bytes=b"+0000")
 
-    with pytest.raises((AttributeTypeError, TypeError)):
+    with pytest.raises((AttributeTypeError, AttributeError, TypeError)):
         TimestampWithTimezone(timestamp=ts, offset_bytes=0)
 
 
-- 
GitLab