Skip to content
Snippets Groups Projects
Commit f9ca60c1 authored by Hélène Jonin's avatar Hélène Jonin
Browse files

Add origin extrinsic metadata endpoint

parent b9bd14e8
No related tags found
1 merge request!1285Update origin intrinsic metadata route and add origin extrinsic metadata route
......@@ -930,6 +930,16 @@ def test_api_origin_intrinsic_metadata(api_client, origin):
assert rv.data[ORIGIN_METADATA_KEY] == ORIGIN_METADATA_VALUE
def test_api_origin_extrinsic_metadata(api_client, origin):
url = reverse(
"api-origin-extrinsic-metadata", query_params={"origin_url": origin["url"]}
)
rv = check_api_get_responses(api_client, url, status_code=200)
assert ORIGIN_METADATA_KEY in rv.data
assert rv.data[ORIGIN_METADATA_KEY] == ORIGIN_METADATA_VALUE
def test_api_origin_metadata_search_invalid(api_client, mocker):
mock_idx_storage = mocker.patch("swh.web.utils.archive.idx_storage")
url = reverse("api-1-origin-metadata-search")
......
......@@ -536,3 +536,40 @@ def api_origin_intrinsic_metadata(request: Request):
enrich_fn=enrich_origin,
request=request,
)
@api_route(r"/extrinsic-metadata/origin/", "api-origin-extrinsic-metadata")
@api_doc("/extrinsic-metadata/origin/", category="Metadata")
@format_docstring()
def api_origin_extrinsic_metadata(request: Request):
"""
.. http:get:: /api/1/origin/(origin_url)/extrinsic-metadata
Get extrinsic metadata of a software origin (as a JSON-LD/CodeMeta dictionary).
:query str origin_url: parameter for origin url
:>json string ???: extrinsic metadata field of the origin
{common_headers}
:statuscode 200: no error
:statuscode 404: requested origin cannot be found in the archive
**Example:**
.. parsed-literal::
:swh_web_api:`extrinsic-metadata/origin/origin_url=https://github.com/node-red/node-red-nodegen`
"""
origin_url = request.GET.get("origin_url")
if origin_url is None:
raise BadInputExc("An origin URL must be provided as query parameter.")
return api_lookup(
archive.lookup_origin_extrinsic_metadata,
origin_url,
lookup_similar_urls=False,
notfound_msg=f"Origin with url {origin_url} not found",
enrich_fn=enrich_origin,
request=request,
)
......@@ -16,7 +16,10 @@ from swh.counters import get_counters
from swh.indexer.fossology_license import FossologyLicenseIndexer
from swh.indexer.mimetype import MimetypeIndexer
from swh.indexer.storage import get_indexer_storage
from swh.indexer.storage.model import OriginIntrinsicMetadataRow
from swh.indexer.storage.model import (
OriginExtrinsicMetadataRow,
OriginIntrinsicMetadataRow,
)
from swh.loader.git.from_disk import GitLoaderFromArchive
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex
from swh.model.model import (
......@@ -314,18 +317,30 @@ def _init_tests_data():
)
)
if b"master" in branch_name:
# Add some origin intrinsic metadata for tests
# Add some origin intrinsic and extrinsic metadata for tests
metadata = common_metadata
metadata.update(origin.get("metadata", {}))
revision = storage.revision_get([branch_data.target])[0]
origin_metadata = OriginIntrinsicMetadataRow(
origin_intrinsic_metadata = OriginIntrinsicMetadataRow(
id=origin["url"],
from_directory=revision.directory,
indexer_configuration_id=idx_tool["id"],
metadata=metadata,
mappings=[],
)
idx_storage.origin_intrinsic_metadata_add([origin_metadata])
idx_storage.origin_intrinsic_metadata_add(
[origin_intrinsic_metadata]
)
origin_extrinsic_metadata = OriginExtrinsicMetadataRow(
id=origin["url"],
from_remd_id=revision.directory,
indexer_configuration_id=idx_tool["id"],
metadata=metadata,
mappings=[],
)
idx_storage.origin_extrinsic_metadata_add(
[origin_extrinsic_metadata]
)
search.origin_update([{"url": origin["url"], "jsonld": metadata}])
ORIGIN_MASTER_REVISION[origin["url"]] = hash_to_hex(
......
......@@ -447,6 +447,38 @@ def lookup_origin_intrinsic_metadata(
return result
def lookup_origin_extrinsic_metadata(
origin_url: str, lookup_similar_urls: bool = True
) -> Dict[str, Any]:
"""Return extrinsic metadata for origin whose origin matches given
origin.
Args:
origin_url: origin url
lookup_similar_urls: if :const:`True`, lookup origin with and
without trailing slash in its URL
Raises:
NotFoundExc when the origin is not found
Returns:
origin metadata.
"""
origins = [
lookup_origin(origin_url, lookup_similar_urls=lookup_similar_urls)["url"]
]
origin_info = storage.origin_get(origins)[0]
if not origin_info:
raise NotFoundExc(f"Origin with url {origin_url} not found!")
match = _first_element(idx_storage.origin_extrinsic_metadata_get(origins))
result = {}
if match:
result = match.metadata
return result
def _to_sha1_bin(sha1_hex):
_, sha1_git_bin = query.parse_hash_with_algorithms_or_throws(
sha1_hex, ["sha1"], "Only sha1_git is supported." # HACK: sha1_git really
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment