Skip to content
Snippets Groups Projects
Commit 2dcbedf3 authored by John Ericson's avatar John Ericson Committed by Phabricator Migration user
Browse files

Make archive.lookup_missing_hashes output bytes

All things equal, I think the bytes representation is better, and in
this case it works well for existing callers too.
parent e6a8303e
No related branches found
No related tags found
No related merge requests found
......@@ -110,12 +110,11 @@ def api_swhid_known(request):
swhids_by_type = group_swhids(swhids)
# search for hashes not present in the storage
missing_hashes = {
k: set(map(hash_to_bytes, archive.lookup_missing_hashes({k: v})))
for k, v in swhids_by_type.items()
k: set(archive.lookup_missing_hashes({k: v})) for k, v in swhids_by_type.items()
}
for swhid in swhids:
if swhid.object_id not in missing_hashes[swhid.object_type]:
if hash_to_bytes(swhid.object_id) not in missing_hashes[swhid.object_type]:
response[str(swhid)]["known"] = True
return response
......@@ -1406,7 +1406,7 @@ def lookup_object(object_type: ObjectType, object_id: str) -> Dict[str, Any]:
raise ValueError(f"Unexpected object type variant: {object_type}")
def lookup_missing_hashes(grouped_swhids: Dict[str, List[bytes]]) -> Set[str]:
def lookup_missing_hashes(grouped_swhids: Dict[str, List[bytes]]) -> Set[bytes]:
"""Lookup missing Software Heritage persistent identifier hash, using
batch processing.
......@@ -1415,7 +1415,7 @@ def lookup_missing_hashes(grouped_swhids: Dict[str, List[bytes]]) -> Set[str]:
keys: object types
values: object hashes
Returns:
A set(hexadecimal) of the hashes not found in the storage
A set(bytes) of the hashes not found in the storage
"""
missing_hashes = []
......@@ -1431,9 +1431,7 @@ def lookup_missing_hashes(grouped_swhids: Dict[str, List[bytes]]) -> Set[str]:
elif obj_type == ObjectType.SNAPSHOT:
missing_hashes.append(storage.snapshot_missing(obj_ids))
missing = set(
map(lambda x: hashutil.hash_to_hex(x), itertools.chain(*missing_hashes))
)
missing = set(itertools.chain(*missing_hashes))
return missing
......
......@@ -31,7 +31,7 @@ from swh.web.common import archive
from swh.web.common.exc import BadInputExc, NotFoundExc
from swh.web.common.typing import OriginInfo, PagedResult
from swh.web.tests.conftest import ctags_json_missing, fossology_missing
from swh.web.tests.data import random_content, random_sha1
from swh.web.tests.data import random_content, random_sha1, random_sha1_bytes
from swh.web.tests.strategies import new_origin, new_revision, visit_dates
......@@ -941,18 +941,18 @@ def test_lookup_invalid_objects(invalid_sha1):
def test_lookup_missing_hashes_non_present():
missing_cnt = random_sha1()
missing_dir = random_sha1()
missing_rev = random_sha1()
missing_rel = random_sha1()
missing_snp = random_sha1()
missing_cnt = random_sha1_bytes()
missing_dir = random_sha1_bytes()
missing_rev = random_sha1_bytes()
missing_rel = random_sha1_bytes()
missing_snp = random_sha1_bytes()
grouped_swhids = {
ObjectType.CONTENT: [hash_to_bytes(missing_cnt)],
ObjectType.DIRECTORY: [hash_to_bytes(missing_dir)],
ObjectType.REVISION: [hash_to_bytes(missing_rev)],
ObjectType.RELEASE: [hash_to_bytes(missing_rel)],
ObjectType.SNAPSHOT: [hash_to_bytes(missing_snp)],
ObjectType.CONTENT: [missing_cnt],
ObjectType.DIRECTORY: [missing_dir],
ObjectType.REVISION: [missing_rev],
ObjectType.RELEASE: [missing_rel],
ObjectType.SNAPSHOT: [missing_snp],
}
actual_result = archive.lookup_missing_hashes(grouped_swhids)
......@@ -967,16 +967,16 @@ def test_lookup_missing_hashes_non_present():
def test_lookup_missing_hashes_some_present(content, directory):
missing_rev = random_sha1()
missing_rel = random_sha1()
missing_snp = random_sha1()
missing_rev = random_sha1_bytes()
missing_rel = random_sha1_bytes()
missing_snp = random_sha1_bytes()
grouped_swhids = {
ObjectType.CONTENT: [hash_to_bytes(content["sha1_git"])],
ObjectType.DIRECTORY: [hash_to_bytes(directory)],
ObjectType.REVISION: [hash_to_bytes(missing_rev)],
ObjectType.RELEASE: [hash_to_bytes(missing_rel)],
ObjectType.SNAPSHOT: [hash_to_bytes(missing_snp)],
ObjectType.REVISION: [missing_rev],
ObjectType.RELEASE: [missing_rel],
ObjectType.SNAPSHOT: [missing_snp],
}
actual_result = archive.lookup_missing_hashes(grouped_swhids)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment