Skip to content
Snippets Groups Projects
Commit 71be4617 authored by David Douard's avatar David Douard
Browse files

Add an ExtID object

this object aims at being able to keep in the SWH Archive an
  SWHID <-> External object ID
map, e.g. to be able to keep track of Mercurial ids so the Mercurial
loader can be made more efficient.

Related to T2849.
parent fca36585
No related branches found
Tags v2.1.0
No related merge requests found
......@@ -296,6 +296,7 @@ def hash_git_data(data, git_type, base_algo="sha1"):
"tag",
"snapshot",
"raw_extrinsic_metadata",
"extid",
}
if git_type not in git_object_types:
......
......@@ -823,6 +823,37 @@ def raw_extrinsic_metadata_identifier(metadata: Dict[str, Any]) -> str:
)
def extid_identifier(extid: Dict[str, Any]) -> str:
"""Return the intrinsic identifier for an ExtID object.
An ExtID identifier is a salted sha1 (using the git hashing algorithm with
the ``extid`` object type) of a manifest following the format:
```
extid_type $StrWithoutSpaces
extid $Bytes
target $CoreSwhid
```
$StrWithoutSpaces is an ASCII string, and may not contain spaces.
Newlines in $Bytes are escaped as with other git fields, ie. by adding a
space after them.
Returns:
str: the intrinsic identifier for `extid`
"""
headers = [
(b"extid_type", extid["extid_type"].encode("ascii")),
(b"extid", extid["extid"]),
(b"target", str(extid["target"]).encode("ascii")),
]
return identifier_to_str(hash_manifest("extid", headers))
# type of the "object_type" attribute of the SWHID class; either
# ObjectType or ExtendedObjectType
_TObjectType = TypeVar("_TObjectType", ObjectType, ExtendedObjectType)
......
......@@ -19,6 +19,7 @@ from .collections import ImmutableDict
from .hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_bytes
from .identifiers import (
directory_identifier,
extid_identifier,
normalize_timestamp,
origin_identifier,
raw_extrinsic_metadata_identifier,
......@@ -1096,3 +1097,25 @@ class RawExtrinsicMetadata(HashableObject, BaseModel):
d[swhid_key] = CoreSWHID.from_string(d[swhid_key])
return super().from_dict(d)
@attr.s(frozen=True, slots=True)
class ExtID(HashableObject, BaseModel):
object_type: Final = "extid"
extid_type = attr.ib(type=str, validator=type_validator())
extid = attr.ib(type=bytes, validator=type_validator())
target = attr.ib(type=CoreSWHID, validator=type_validator())
id = attr.ib(type=Sha1Git, validator=type_validator(), default=b"")
@classmethod
def from_dict(cls, d):
return cls(
extid=d["extid"],
extid_type=d["extid_type"],
target=CoreSWHID.from_string(d["target"]),
)
def compute_hash(self) -> bytes:
return hash_to_bytes(extid_identifier(self.to_dict()))
......@@ -15,6 +15,7 @@ from swh.model.model import (
Content,
Directory,
DirectoryEntry,
ExtID,
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
......@@ -131,6 +132,11 @@ REVISIONS = [
),
]
EXTIDS = [
ExtID(extid_type="git256", extid=b"\x03" * 32, target=REVISIONS[0].swhid(),),
ExtID(extid_type="hg", extid=b"\x04" * 20, target=REVISIONS[1].swhid(),),
]
RELEASES = [
Release(
id=hash_to_bytes("8059dc4e17fcd0e51ca3bcd6b80f4577d281fd08"),
......@@ -330,6 +336,7 @@ RAW_EXTRINSIC_METADATA = [
TEST_OBJECTS: Dict[str, Sequence[BaseModel]] = {
"content": CONTENTS,
"directory": DIRECTORIES,
"extid": EXTIDS,
"metadata_authority": METADATA_AUTHORITIES,
"metadata_fetcher": METADATA_FETCHERS,
"origin": ORIGINS,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment