Skip to content
Snippets Groups Projects
Commit 43863306 authored by vlorentz's avatar vlorentz
Browse files

identifiers: Properly define the behavior of raw_extrinsic_metadata on negative timestamps.

The rounding algorithm wasn't specified
parent 3ce41250
No related branches found
No related tags found
No related merge requests found
......@@ -759,9 +759,9 @@ def raw_extrinsic_metadata_identifier(metadata: Dict[str, Any]) -> str:
$ExtendedSwhid is a core SWHID, with extra types allowed ('ori' for
origins and 'emd' for raw extrinsic metadata)
$Timestamp is a decimal representation of the integer number of seconds since
the UNIX epoch (1970-01-01 00:00:00 UTC), with no leading '0'
(unless the timestamp value is zero) and no timezone.
$Timestamp is a decimal representation of the rounded-down integer number of
seconds since the UNIX epoch (1970-01-01 00:00:00 UTC),
with no leading '0' (unless the timestamp value is zero) and no timezone.
It may be negative by prefixing it with a '-', which must not be followed
by a '0'.
......@@ -772,7 +772,19 @@ def raw_extrinsic_metadata_identifier(metadata: Dict[str, Any]) -> str:
str: the intrinsic identifier for `metadata`
"""
timestamp = metadata["discovery_date"].timestamp()
# equivalent to using math.floor(dt.timestamp()) to round down,
# as int(dt.timestamp()) rounds toward zero,
# which would map two seconds on the 0 timestamp.
#
# This should never be an issue in practice as Software Heritage didn't
# start collecting metadata before 2015.
timestamp = (
metadata["discovery_date"]
.astimezone(datetime.timezone.utc)
.replace(microsecond=0)
.timestamp()
)
assert timestamp.is_integer()
headers = [
(b"target", str(metadata["target"]).encode()),
......
......@@ -920,6 +920,26 @@ class RawExtrinsicMetadataIdentifier(unittest.TestCase):
"5c13f20ba336e44549baf3d7b9305b027ec9f43d",
)
def test_noninteger_timezone(self):
"""Checks the discovery_date is translated to UTC before truncating
microseconds"""
tz = datetime.timezone(datetime.timedelta(microseconds=-42))
metadata = {
**self.minimal,
"discovery_date": datetime.datetime(
2021, 1, 25, 11, 27, 50, 1_000_000 - 42, tzinfo=tz,
),
}
self.assertEqual(
identifiers.raw_extrinsic_metadata_identifier(self.minimal),
identifiers.raw_extrinsic_metadata_identifier(metadata),
)
self.assertEqual(
identifiers.raw_extrinsic_metadata_identifier(metadata),
"5c13f20ba336e44549baf3d7b9305b027ec9f43d",
)
def test_negative_timestamp(self):
metadata = {
**self.minimal,
......@@ -948,6 +968,62 @@ class RawExtrinsicMetadataIdentifier(unittest.TestCase):
"895d0821a2991dd376ddc303424aceb7c68280f9",
)
def test_epoch(self):
metadata = {
**self.minimal,
"discovery_date": datetime.datetime(
1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc,
),
}
manifest = (
b"raw_extrinsic_metadata 201\0"
b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
b"discovery_date 0\n"
b"authority forge https://forge.softwareheritage.org/\n"
b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
b"format json\n"
b"\n"
b'{"foo": "bar"}'
)
self.assertEqual(
identifiers.raw_extrinsic_metadata_identifier(metadata),
hashlib.sha1(manifest).hexdigest(),
)
self.assertEqual(
identifiers.raw_extrinsic_metadata_identifier(metadata),
"27a53df54ace35ebd910493cdc70b334d6b7cb88",
)
def test_negative_epoch(self):
metadata = {
**self.minimal,
"discovery_date": datetime.datetime(
1969, 12, 31, 23, 59, 59, 1, tzinfo=datetime.timezone.utc,
),
}
manifest = (
b"raw_extrinsic_metadata 202\0"
b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
b"discovery_date -1\n"
b"authority forge https://forge.softwareheritage.org/\n"
b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
b"format json\n"
b"\n"
b'{"foo": "bar"}'
)
self.assertEqual(
identifiers.raw_extrinsic_metadata_identifier(metadata),
hashlib.sha1(manifest).hexdigest(),
)
self.assertEqual(
identifiers.raw_extrinsic_metadata_identifier(metadata),
"be7154a8fd49d87f81547ea634d1e2152907d089",
)
origin_example = {
"url": "https://github.com/torvalds/linux",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment