diff --git a/swh/fuse/cache.py b/swh/fuse/cache.py index 5c4e7ec874ee0a656d5048f79bd8bb61e07f6f27..c4299b16e709295e3c2bf8c623972e3c5627d039 100644 --- a/swh/fuse/cache.py +++ b/swh/fuse/cache.py @@ -23,7 +23,7 @@ from swh.fuse.fs.artifact import RevisionHistoryShardByDate from swh.fuse.fs.entry import FuseDirEntry, FuseEntry from swh.fuse.fs.mountpoint import CacheDir, OriginDir from swh.model.exceptions import ValidationError -from swh.model.identifiers import REVISION, SWHID, parse_swhid +from swh.model.identifiers import CoreSWHID, ObjectType from swh.web.client.client import ORIGIN_VISIT, typify_json @@ -79,7 +79,7 @@ class FuseCache: await self.blob.__aexit__() await self.history.__aexit__() - async def get_cached_swhids(self) -> AsyncGenerator[SWHID, None]: + async def get_cached_swhids(self) -> AsyncGenerator[CoreSWHID, None]: """ Return a list of all previously cached SWHID """ # Use the metadata db since it should always contain all accessed SWHIDs @@ -88,7 +88,7 @@ class FuseCache: ) swhids = await metadata_cursor.fetchall() for raw_swhid in swhids: - yield parse_swhid(raw_swhid[0]) + yield CoreSWHID.from_string(raw_swhid[0]) async def get_cached_visits(self) -> AsyncGenerator[str, None]: """ Return a list of all previously cached visit URL """ @@ -149,14 +149,18 @@ class MetadataCache(AbstractCache): ); """ - async def get(self, swhid: SWHID, typify: bool = True) -> Any: + async def get(self, swhid: CoreSWHID, typify: bool = True) -> Any: cursor = await self.conn.execute( "select metadata from metadata_cache where swhid=?", (str(swhid),) ) cache = await cursor.fetchone() if cache: metadata = json.loads(cache[0]) - return typify_json(metadata, swhid.object_type) if typify else metadata + return ( + typify_json(metadata, swhid.object_type.name.lower()) + if typify + else metadata + ) else: return None @@ -178,10 +182,10 @@ class MetadataCache(AbstractCache): else: return None - async def set(self, swhid: SWHID, metadata: Any) -> None: + async def set(self, swhid: CoreSWHID, metadata: Any) -> None: # Fill in the date column for revisions (used as cache for history/by-date/) swhid_date = "" - if swhid.object_type == REVISION: + if swhid.object_type == ObjectType.REVISION: date = dateutil.parser.parse(metadata["date"]) swhid_date = RevisionHistoryShardByDate.DATE_FMT.format( year=date.year, month=date.month, day=date.day @@ -200,7 +204,7 @@ class MetadataCache(AbstractCache): ) await self.conn.commit() - async def remove(self, swhid: SWHID) -> None: + async def remove(self, swhid: CoreSWHID) -> None: await self.conn.execute( "delete from metadata_cache where swhid=?", (str(swhid),), ) @@ -223,7 +227,7 @@ class BlobCache(AbstractCache): ); """ - async def get(self, swhid: SWHID) -> Optional[bytes]: + async def get(self, swhid: CoreSWHID) -> Optional[bytes]: cursor = await self.conn.execute( "select blob from blob_cache where swhid=?", (str(swhid),) ) @@ -234,13 +238,13 @@ class BlobCache(AbstractCache): else: return None - async def set(self, swhid: SWHID, blob: bytes) -> None: + async def set(self, swhid: CoreSWHID, blob: bytes) -> None: await self.conn.execute( "insert into blob_cache values (?, ?)", (str(swhid), blob) ) await self.conn.commit() - async def remove(self, swhid: SWHID) -> None: + async def remove(self, swhid: CoreSWHID) -> None: await self.conn.execute( "delete from blob_cache where swhid=?", (str(swhid),), ) @@ -277,7 +281,7 @@ class HistoryCache(AbstractCache): select * from dfs limit -1 offset 1 """ - async def get(self, swhid: SWHID) -> Optional[List[SWHID]]: + async def get(self, swhid: CoreSWHID) -> Optional[List[CoreSWHID]]: cursor = await self.conn.execute(self.HISTORY_REC_QUERY, (str(swhid),),) cache = await cursor.fetchall() if not cache: @@ -286,14 +290,14 @@ class HistoryCache(AbstractCache): for row in cache: parent = row[0] try: - history.append(parse_swhid(parent)) + history.append(CoreSWHID.from_string(parent)) except ValidationError: logging.warning("Cannot parse object from history cache: %s", parent) return history async def get_with_date_prefix( - self, swhid: SWHID, date_prefix: str - ) -> List[Tuple[SWHID, str]]: + self, swhid: CoreSWHID, date_prefix: str + ) -> List[Tuple[CoreSWHID, str]]: cursor = await self.conn.execute( f""" select swhid, date from ( {self.HISTORY_REC_QUERY} ) as history @@ -310,7 +314,7 @@ class HistoryCache(AbstractCache): for row in cache: parent, date = row[0], row[1] try: - history.append((parse_swhid(parent), date)) + history.append((CoreSWHID.from_string(parent), date)) except ValidationError: logging.warning("Cannot parse object from history cache: %s", parent) return history diff --git a/swh/fuse/cli.py b/swh/fuse/cli.py index 43b111e42b89a318f18f8c71fe8cfc4f69a9f763..8f5bf38b94cabb413b65f13a4c56071623cb0c8c 100644 --- a/swh/fuse/cli.py +++ b/swh/fuse/cli.py @@ -13,7 +13,7 @@ import click from swh.core.cli import CONTEXT_SETTINGS from swh.core.cli import swh as swh_cli_group -from swh.model.cli import SWHIDParamType +from swh.model.cli import CoreSWHIDParamType # All generic config code should reside in swh.core.config DEFAULT_CONFIG_PATH = os.environ.get( @@ -97,7 +97,7 @@ def fuse(ctx, config_file): metavar="PATH", type=click.Path(exists=True, dir_okay=True, file_okay=False), ) -@click.argument("swhids", nargs=-1, metavar="[SWHID]...", type=SWHIDParamType()) +@click.argument("swhids", nargs=-1, metavar="[SWHID]...", type=CoreSWHIDParamType()) @click.option( "-f/-d", "--foreground/--daemon", diff --git a/swh/fuse/fs/artifact.py b/swh/fuse/fs/artifact.py index 85651c1b0cabcd87233a3768b7a659b25e05f04d..78ddf3d3ee7aed5ef997812c174f17b6934c2443 100644 --- a/swh/fuse/fs/artifact.py +++ b/swh/fuse/fs/artifact.py @@ -20,7 +20,8 @@ from swh.fuse.fs.entry import ( FuseSymlinkEntry, ) from swh.model.from_disk import DentryPerms -from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT, SWHID +from swh.model.hashutil import hash_to_bytes, hash_to_hex +from swh.model.identifiers import CoreSWHID, ObjectType SWHID_REGEXP = r"swh:1:(cnt|dir|rel|rev|snp):[0-9a-f]{40}" @@ -41,7 +42,7 @@ class Content(FuseFileEntry): directory, the permissions of the `archive/SWHID` file will be arbitrary and not meaningful (e.g., `0x644`). """ - swhid: SWHID + swhid: CoreSWHID prefetch: Any = None async def get_content(self) -> bytes: @@ -73,7 +74,7 @@ class Directory(FuseDirEntry): So it is possible that, in the context of a directory, a file is presented as writable, whereas actually writing to it will fail with `EPERM`. """ - swhid: SWHID + swhid: CoreSWHID async def compute_entries(self) -> AsyncIterator[FuseEntry]: metadata = await self.fuse.get_metadata(self.swhid) @@ -84,7 +85,7 @@ class Directory(FuseDirEntry): # Archived permissions for directories are always set to # 0o040000 so use a read-only permission instead int(EntryMode.RDONLY_DIR) - if swhid.object_type == DIRECTORY + if swhid.object_type == ObjectType.DIRECTORY else entry["perms"] ) @@ -101,7 +102,7 @@ class Directory(FuseDirEntry): FuseSymlinkEntry, name=name, target=target, ) # 2. Regular file - elif swhid.object_type == CONTENT: + elif swhid.object_type == ObjectType.CONTENT: yield self.create_child( Content, name=name, @@ -112,12 +113,12 @@ class Directory(FuseDirEntry): prefetch=entry, ) # 3. Regular directory - elif swhid.object_type == DIRECTORY: + elif swhid.object_type == ObjectType.DIRECTORY: yield self.create_child( Directory, name=name, mode=mode, swhid=swhid, ) # 4. Submodule - elif swhid.object_type == REVISION: + elif swhid.object_type == ObjectType.REVISION: try: # Make sure the revision metadata is fetched and create a # symlink to distinguish it with regular directories @@ -159,7 +160,7 @@ class Revision(FuseDirEntry): - `meta.json`: metadata for the current node, as a symlink pointing to the relevant `archive/<SWHID>.json` file """ - swhid: SWHID + swhid: CoreSWHID async def compute_entries(self) -> AsyncIterator[FuseEntry]: metadata = await self.fuse.get_metadata(self.swhid) @@ -202,7 +203,7 @@ class Revision(FuseDirEntry): class RevisionParents(FuseDirEntry): """ Revision virtual `parents/` directory """ - parents: List[SWHID] + parents: List[CoreSWHID] async def compute_entries(self) -> AsyncIterator[FuseEntry]: root_path = self.get_relative_root_path() @@ -218,7 +219,7 @@ class RevisionParents(FuseDirEntry): class RevisionHistory(FuseDirEntry): """ Revision virtual `history/` directory """ - swhid: SWHID + swhid: CoreSWHID async def prefill_by_date_cache(self, by_date_dir: FuseDirEntry) -> None: history = await self.fuse.get_history(self.swhid) @@ -272,7 +273,7 @@ class RevisionHistory(FuseDirEntry): class RevisionHistoryShardByDate(FuseDirEntry): """ Revision virtual `history/by-date` sharded directory """ - history_swhid: SWHID + history_swhid: CoreSWHID prefix: str = field(default="") is_status_done: bool = field(default=False) @@ -285,7 +286,7 @@ class RevisionHistoryShardByDate(FuseDirEntry): name: str = field(init=False, default=".status") mode: int = field(init=False, default=int(EntryMode.RDONLY_FILE)) - history_swhid: SWHID + history_swhid: CoreSWHID def __post_init__(self): super().__post_init__() @@ -355,7 +356,7 @@ class RevisionHistoryShardByDate(FuseDirEntry): class RevisionHistoryShardByHash(FuseDirEntry): """ Revision virtual `history/by-hash` sharded directory """ - history_swhid: SWHID + history_swhid: CoreSWHID prefix: str = field(default="") SHARDING_LENGTH = 2 @@ -367,7 +368,7 @@ class RevisionHistoryShardByHash(FuseDirEntry): if self.prefix: root_path = self.get_relative_root_path() for swhid in history: - if swhid.object_id.startswith(self.prefix): + if swhid.object_id.startswith(hash_to_bytes(self.prefix)): yield self.create_child( FuseSymlinkEntry, name=str(swhid), @@ -377,7 +378,7 @@ class RevisionHistoryShardByHash(FuseDirEntry): else: sharded_dirs = set() for swhid in history: - next_prefix = swhid.object_id[: self.SHARDING_LENGTH] + next_prefix = hash_to_hex(swhid.object_id)[: self.SHARDING_LENGTH] if next_prefix not in sharded_dirs: sharded_dirs.add(next_prefix) yield self.create_child( @@ -393,7 +394,7 @@ class RevisionHistoryShardByHash(FuseDirEntry): class RevisionHistoryShardByPage(FuseDirEntry): """ Revision virtual `history/by-page` sharded directory """ - history_swhid: SWHID + history_swhid: CoreSWHID prefix: Optional[int] = field(default=None) PAGE_SIZE = 10_000 @@ -445,16 +446,16 @@ class Release(FuseDirEntry): - `meta.json`: metadata for the current node, as a symlink pointing to the relevant `archive/<SWHID>.json` file """ - swhid: SWHID + swhid: CoreSWHID - async def find_root_directory(self, swhid: SWHID) -> Optional[SWHID]: - if swhid.object_type == RELEASE: + async def find_root_directory(self, swhid: CoreSWHID) -> Optional[CoreSWHID]: + if swhid.object_type == ObjectType.RELEASE: metadata = await self.fuse.get_metadata(swhid) return await self.find_root_directory(metadata["target"]) - elif swhid.object_type == REVISION: + elif swhid.object_type == ObjectType.REVISION: metadata = await self.fuse.get_metadata(swhid) return metadata["directory"] - elif swhid.object_type == DIRECTORY: + elif swhid.object_type == ObjectType.DIRECTORY: return swhid else: return None @@ -493,10 +494,10 @@ class Release(FuseDirEntry): class ReleaseType(FuseFileEntry): """ Release type virtual file """ - target_type: str + target_type: ObjectType async def get_content(self) -> bytes: - return str.encode(self.target_type + "\n") + return str.encode(self.target_type.name.lower() + "\n") @dataclass @@ -512,7 +513,7 @@ class Snapshot(FuseDirEntry): ``tags`` directory containing a ``v1.0`` symlink pointing to the branch target SWHID. """ - swhid: SWHID + swhid: CoreSWHID prefix: str = field(default="") async def compute_entries(self) -> AsyncIterator[FuseEntry]: @@ -627,9 +628,9 @@ class OriginVisit(FuseDirEntry): OBJTYPE_GETTERS = { - CONTENT: Content, - DIRECTORY: Directory, - REVISION: Revision, - RELEASE: Release, - SNAPSHOT: Snapshot, + ObjectType.CONTENT: Content, + ObjectType.DIRECTORY: Directory, + ObjectType.REVISION: Revision, + ObjectType.RELEASE: Release, + ObjectType.SNAPSHOT: Snapshot, } diff --git a/swh/fuse/fs/mountpoint.py b/swh/fuse/fs/mountpoint.py index 9c302d88ba560a1ffadd65d8ea67a6a7e8ed4807..a894f97f41b5337b2e3d6c2eb55cc38a06342419 100644 --- a/swh/fuse/fs/mountpoint.py +++ b/swh/fuse/fs/mountpoint.py @@ -18,7 +18,8 @@ from swh.fuse.fs.entry import ( FuseSymlinkEntry, ) from swh.model.exceptions import ValidationError -from swh.model.identifiers import CONTENT, SWHID, parse_swhid +from swh.model.hashutil import hash_to_hex +from swh.model.identifiers import CoreSWHID, ObjectType JSON_SUFFIX = ".json" @@ -60,7 +61,7 @@ class ArchiveDir(FuseDirEntry): # On the fly mounting of a new artifact try: if name.endswith(JSON_SUFFIX): - swhid = parse_swhid(name[: -len(JSON_SUFFIX)]) + swhid = CoreSWHID.from_string(name[: -len(JSON_SUFFIX)]) return self.create_child( MetaEntry, name=f"{swhid}{JSON_SUFFIX}", @@ -68,14 +69,14 @@ class ArchiveDir(FuseDirEntry): swhid=swhid, ) else: - swhid = parse_swhid(name) + swhid = CoreSWHID.from_string(name) await self.fuse.get_metadata(swhid) return self.create_child( OBJTYPE_GETTERS[swhid.object_type], name=str(swhid), mode=int( EntryMode.RDONLY_FILE - if swhid.object_type == CONTENT + if swhid.object_type == ObjectType.CONTENT else EntryMode.RDONLY_DIR ), swhid=swhid, @@ -89,7 +90,7 @@ class MetaEntry(FuseFileEntry): """ An entry for a `archive/<SWHID>.json` file, containing all the SWHID's metadata from the Software Heritage archive. """ - swhid: SWHID + swhid: CoreSWHID async def get_content(self) -> bytes: # Make sure the metadata is in cache @@ -162,7 +163,7 @@ class CacheDir(FuseDirEntry): async def compute_entries(self) -> AsyncIterator[FuseEntry]: root_path = self.get_relative_root_path() async for swhid in self.fuse.cache.get_cached_swhids(): - if not swhid.object_id.startswith(self.prefix): + if not hash_to_hex(swhid.object_id).startswith(self.prefix): continue yield self.create_child( @@ -180,7 +181,7 @@ class CacheDir(FuseDirEntry): try: if name.endswith(JSON_SUFFIX): name = name[: -len(JSON_SUFFIX)] - swhid = parse_swhid(name) + swhid = CoreSWHID.from_string(name) await self.fuse.cache.metadata.remove(swhid) await self.fuse.cache.blob.remove(swhid) except ValidationError: @@ -189,7 +190,7 @@ class CacheDir(FuseDirEntry): async def compute_entries(self) -> AsyncIterator[FuseEntry]: prefixes = set() async for swhid in self.fuse.cache.get_cached_swhids(): - prefixes.add(swhid.object_id[:2]) + prefixes.add(hash_to_hex(swhid.object_id)[:2]) for prefix in prefixes: yield self.create_child( diff --git a/swh/fuse/fuse.py b/swh/fuse/fuse.py index d18fa3bfe6a7bff1b6471555ffd264e338de57d7..ec27fb96a030021b8f0d995fe730b23a49370821 100644 --- a/swh/fuse/fuse.py +++ b/swh/fuse/fuse.py @@ -21,7 +21,7 @@ from swh.fuse import LOGGER_NAME from swh.fuse.cache import FuseCache from swh.fuse.fs.entry import FuseDirEntry, FuseEntry, FuseFileEntry, FuseSymlinkEntry from swh.fuse.fs.mountpoint import Root -from swh.model.identifiers import CONTENT, REVISION, SWHID +from swh.model.identifiers import CoreSWHID, ObjectType from swh.web.client.client import WebAPIClient @@ -81,7 +81,7 @@ class Fuse(pyfuse3.Operations): except KeyError: raise pyfuse3.FUSEError(errno.ENOENT) - async def get_metadata(self, swhid: SWHID) -> Any: + async def get_metadata(self, swhid: CoreSWHID) -> Any: """ Retrieve metadata for a given SWHID using Software Heritage API """ cache = await self.cache.metadata.get(swhid) @@ -100,11 +100,11 @@ class Fuse(pyfuse3.Operations): self.logger.error("Cannot fetch metadata for object %s: %s", swhid, err) raise - async def get_blob(self, swhid: SWHID) -> bytes: + async def get_blob(self, swhid: CoreSWHID) -> bytes: """ Retrieve the blob bytes for a given content SWHID using Software Heritage API """ - if swhid.object_type != CONTENT: + if swhid.object_type != ObjectType.CONTENT: raise pyfuse3.FUSEError(errno.EINVAL) # Make sure the metadata cache is also populated with the given SWHID @@ -126,10 +126,10 @@ class Fuse(pyfuse3.Operations): self.logger.error("Cannot fetch blob for object %s: %s", swhid, err) raise - async def get_history(self, swhid: SWHID) -> List[SWHID]: + async def get_history(self, swhid: CoreSWHID) -> List[CoreSWHID]: """ Retrieve a revision's history using Software Heritage Graph API """ - if swhid.object_type != REVISION: + if swhid.object_type != ObjectType.REVISION: raise pyfuse3.FUSEError(errno.EINVAL) cache = await self.cache.history.get(swhid) @@ -332,7 +332,7 @@ class Fuse(pyfuse3.Operations): raise pyfuse3.FUSEError(errno.ENOENT) -async def main(swhids: List[SWHID], root_path: Path, conf: Dict[str, Any]) -> None: +async def main(swhids: List[CoreSWHID], root_path: Path, conf: Dict[str, Any]) -> None: """ swh-fuse CLI entry-point """ # Use pyfuse3 asyncio layer to match the rest of Software Heritage codebase diff --git a/swh/fuse/tests/api_url.py b/swh/fuse/tests/api_url.py index 60c47cc337f17364ee93f34b02d75f16bde823f3..5bcf2ec16decbb1ef7943d3403adabce74a61674 100644 --- a/swh/fuse/tests/api_url.py +++ b/swh/fuse/tests/api_url.py @@ -6,42 +6,35 @@ from enum import Enum from typing import Union -from swh.model.identifiers import ( - CONTENT, - DIRECTORY, - RELEASE, - REVISION, - SNAPSHOT, - SWHID, - parse_swhid, -) +from swh.model.hashutil import hash_to_hex +from swh.model.identifiers import CoreSWHID, ObjectType GRAPH_API_REQUEST = Enum("GRAPH_API_REQUEST", "HISTORY") -def swhid_to_web_url(swhid: Union[SWHID, str], raw: bool = False) -> str: +def swhid_to_web_url(swhid: Union[CoreSWHID, str], raw: bool = False) -> str: if isinstance(swhid, str): - swhid = parse_swhid(swhid) + swhid = CoreSWHID.from_string(swhid) prefix = { - CONTENT: "content/sha1_git:", - DIRECTORY: "directory/", - REVISION: "revision/", - RELEASE: "release/", - SNAPSHOT: "snapshot/", + ObjectType.CONTENT: "content/sha1_git:", + ObjectType.DIRECTORY: "directory/", + ObjectType.REVISION: "revision/", + ObjectType.RELEASE: "release/", + ObjectType.SNAPSHOT: "snapshot/", } - url = f"{prefix[swhid.object_type]}{swhid.object_id}/" + url = f"{prefix[swhid.object_type]}{hash_to_hex(swhid.object_id)}/" if raw: url += "raw/" return url def swhid_to_graph_url( - swhid: Union[SWHID, str], request_type: GRAPH_API_REQUEST + swhid: Union[CoreSWHID, str], request_type: GRAPH_API_REQUEST ) -> str: if isinstance(swhid, str): - swhid = parse_swhid(swhid) + swhid = CoreSWHID.from_string(swhid) prefix = { GRAPH_API_REQUEST.HISTORY: "graph/visit/edges/", diff --git a/swh/fuse/tests/data/gen-api-data.py b/swh/fuse/tests/data/gen-api-data.py index edc7f6edb3a3dff8df45768ffd9444d59347a822..42f74c670fe72f0a877ab990985f639f6e01b2f4 100755 --- a/swh/fuse/tests/data/gen-api-data.py +++ b/swh/fuse/tests/data/gen-api-data.py @@ -23,15 +23,8 @@ from swh.fuse.tests.data.config import ( ORIGIN_URL, REV_SMALL_HISTORY, ) -from swh.model.identifiers import ( - CONTENT, - DIRECTORY, - RELEASE, - REVISION, - SNAPSHOT, - SWHID, - parse_swhid, -) +from swh.model.hashutil import hash_to_bytes +from swh.model.identifiers import CoreSWHID, ObjectType API_URL_real = "https://archive.softwareheritage.org/api/1" API_URL_test = "https://invalid-test-only.archive.softwareheritage.org/api/1" @@ -43,7 +36,7 @@ API_TOKEN = "" MOCK_ARCHIVE: Dict[str, Any] = {} # Temporary map (swhid -> metadata) to ease data generation -METADATA: Dict[SWHID, Any] = {} +METADATA: Dict[CoreSWHID, Any] = {} def get_from_api(endpoint: str) -> str: @@ -51,19 +44,8 @@ def get_from_api(endpoint: str) -> str: return requests.get(f"{API_URL_real}/{endpoint}", headers=headers).text -def get_short_type(object_type: str) -> str: - short_type = { - CONTENT: "cnt", - DIRECTORY: "dir", - REVISION: "rev", - RELEASE: "rel", - SNAPSHOT: "snp", - } - return short_type[object_type] - - def generate_archive_web_api( - swhid: SWHID, raw: bool = False, recursive: bool = False + swhid: CoreSWHID, raw: bool = False, recursive: bool = False ) -> None: # Already in mock archive if swhid in METADATA and not raw: @@ -81,17 +63,20 @@ def generate_archive_web_api( # Retrieve additional needed data for different artifacts (eg: content's # blob data, release target, etc.) if recursive: - if swhid.object_type == CONTENT: + if swhid.object_type == ObjectType.CONTENT: generate_archive_web_api(swhid, raw=True) - elif swhid.object_type == RELEASE: + elif swhid.object_type == ObjectType.RELEASE: target_type = METADATA[swhid]["target_type"] target_id = METADATA[swhid]["target"] - target = parse_swhid(f"swh:1:{get_short_type(target_type)}:{target_id}") + target = CoreSWHID( + object_type=ObjectType[target_type.upper()], + object_id=hash_to_bytes(target_id), + ) generate_archive_web_api(target, recursive=True) -def generate_archive_graph_api(swhid: SWHID) -> None: - if swhid.object_type == REVISION: +def generate_archive_graph_api(swhid: CoreSWHID) -> None: + if swhid.object_type == ObjectType.REVISION: # Empty history for all revisions (except REV_SMALL_HISTORY used in tests) url = swhid_to_graph_url(swhid, GRAPH_API_REQUEST.HISTORY) MOCK_ARCHIVE[url] = "" @@ -144,7 +129,7 @@ swh:1:rev:d6b7c96c3eb29b9244ece0c046d3f372ff432d04 swh:1:rev:c01efc669f09508b55e hist_nodes = set( map( - parse_swhid, + CoreSWHID.from_string, [edge.split(" ")[1] for edge in history.strip().split("\n")], ) ) @@ -163,7 +148,7 @@ def generate_origin_archive_web_api(url: str): for entry in ALL_ENTRIES: - swhid = parse_swhid(entry) + swhid = CoreSWHID.from_string(entry) generate_archive_web_api(swhid, recursive=True) generate_archive_graph_api(swhid) diff --git a/swh/fuse/tests/test_cache.py b/swh/fuse/tests/test_cache.py index b230709b1fe8226a8fe78c55fa434dea47ed6482..c650fe7ad35b979836720ef0bd74d4dd103e8518 100644 --- a/swh/fuse/tests/test_cache.py +++ b/swh/fuse/tests/test_cache.py @@ -6,7 +6,8 @@ import os from swh.fuse.tests.data.config import REGULAR_FILE -from swh.model.identifiers import parse_swhid +from swh.model.hashutil import hash_to_hex +from swh.model.identifiers import CoreSWHID def test_cache_artifact(fuse_mntdir): @@ -14,8 +15,11 @@ def test_cache_artifact(fuse_mntdir): (fuse_mntdir / "archive" / REGULAR_FILE).is_file() - swhid = parse_swhid(REGULAR_FILE) - assert os.listdir(fuse_mntdir / "cache") == [swhid.object_id[:2], "origin"] + swhid = CoreSWHID.from_string(REGULAR_FILE) + assert os.listdir(fuse_mntdir / "cache") == [ + hash_to_hex(swhid.object_id)[:2], + "origin", + ] def test_purge_artifact(fuse_mntdir): @@ -27,7 +31,7 @@ def test_purge_artifact(fuse_mntdir): (fuse_mntdir / "archive" / REGULAR_FILE).is_file() assert os.listdir(fuse_mntdir / "cache") != DEFAULT_CACHE_CONTENT # ... and remove it from cache - swhid = parse_swhid(REGULAR_FILE) - os.unlink(fuse_mntdir / "cache" / swhid.object_id[:2] / str(swhid)) + swhid = CoreSWHID.from_string(REGULAR_FILE) + os.unlink(fuse_mntdir / "cache" / hash_to_hex(swhid.object_id)[:2] / str(swhid)) assert os.listdir(fuse_mntdir / "cache") == DEFAULT_CACHE_CONTENT diff --git a/swh/fuse/tests/test_mountpoint.py b/swh/fuse/tests/test_mountpoint.py index 09c0e7532c6c14421d3f27b89bbe5bf5092da5ce..3a259d2b8b6eca702b0ce6e406f902a2f306dac8 100644 --- a/swh/fuse/tests/test_mountpoint.py +++ b/swh/fuse/tests/test_mountpoint.py @@ -6,7 +6,7 @@ import os from swh.fuse.tests.data.config import ORIGIN_URL_ENCODED, REGULAR_FILE -from swh.model.identifiers import parse_swhid +from swh.model.identifiers import CoreSWHID def test_mountpoint(fuse_mntdir): @@ -21,7 +21,7 @@ def test_on_the_fly_mounting(fuse_mntdir): assert os.listdir(fuse_mntdir / "origin") == [] assert (fuse_mntdir / "origin" / ORIGIN_URL_ENCODED).is_dir() - sharded_dir = parse_swhid(REGULAR_FILE).object_id[:2] + sharded_dir = CoreSWHID.from_string(REGULAR_FILE).object_id.hex()[:2] assert os.listdir(fuse_mntdir / "cache") == [sharded_dir, "origin"] assert os.listdir(fuse_mntdir / "cache" / sharded_dir) == [ REGULAR_FILE, diff --git a/swh/fuse/tests/test_revision.py b/swh/fuse/tests/test_revision.py index d88215162c22eb25d5b1e088c43de2d81b13df1e..b610b886e33d2569f69471fdfa074f78fb83c988 100644 --- a/swh/fuse/tests/test_revision.py +++ b/swh/fuse/tests/test_revision.py @@ -12,7 +12,8 @@ from swh.fuse.tests.common import ( get_data_from_web_archive, ) from swh.fuse.tests.data.config import REV_SMALL_HISTORY, ROOT_DIR, ROOT_REV -from swh.model.identifiers import parse_swhid +from swh.model.hashutil import hash_to_hex +from swh.model.identifiers import CoreSWHID def test_access_meta(fuse_mntdir): @@ -53,12 +54,12 @@ def test_list_history(fuse_mntdir): # Only keep second node in the edge because first node is redundant # information or the root node (hence not an ancestor) expected = set( - map(parse_swhid, [edge.split(" ")[1] for edge in history.split("\n")]) + map(CoreSWHID.from_string, [edge.split(" ")[1] for edge in history.split("\n")]) ) dir_by_hash = dir_path / "by-hash" for swhid in expected: - depth1 = swhid.object_id[:2] + depth1 = hash_to_hex(swhid.object_id)[:2] depth2 = str(swhid) assert (dir_by_hash / depth1).exists() assert depth2 in (os.listdir(dir_by_hash / depth1))