Skip to content
Snippets Groups Projects
Commit 358ee084 authored by vlorentz's avatar vlorentz
Browse files

Use compact URIs for ForgeFed and ActivityStreams

It makes resulting documents (usually) shorter, and tests more readable.
parent d41f26ee
No related branches found
No related tags found
1 merge request!341Use compact URIs for ForgeFed and ActivityStreams
......@@ -9,6 +9,7 @@ import itertools
import json
import os.path
import re
from typing import Any, List
from pyld import jsonld
......@@ -120,11 +121,18 @@ def _document_loader(url, options=None):
raise Exception(url)
def compact(doc):
"""Same as `pyld.jsonld.compact`, but in the context of CodeMeta."""
return jsonld.compact(
doc, CODEMETA_CONTEXT_URL, options={"documentLoader": _document_loader}
)
def compact(doc, forgefed: bool):
"""Same as `pyld.jsonld.compact`, but in the context of CodeMeta.
Args:
forgefed: Whether to add ForgeFed and ActivityStreams as compact URIs.
This is typically used for extrinsic metadata documents, which frequently
use properties from these namespaces.
"""
contexts: List[Any] = [CODEMETA_CONTEXT_URL]
if forgefed:
contexts.append({"as": ACTIVITYSTREAMS_URI, "forge": FORGEFED_URI})
return jsonld.compact(doc, contexts, options={"documentLoader": _document_loader})
def expand(doc):
......@@ -202,4 +210,7 @@ def merge_documents(documents):
elif value not in merged_document[key]:
merged_document[key].append(value)
return compact(merged_document)
# XXX: we should set forgefed=True when merging extrinsic-metadata documents.
# however, this function is only used to merge multiple files of the same
# directory (which is only for intrinsic-metadata), so it is not an issue for now
return compact(merged_document, forgefed=False)
......@@ -62,7 +62,7 @@ class BaseMapping:
raise NotImplementedError(f"{self.__class__.__name__}.translate")
def normalize_translation(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
return compact(metadata)
raise NotImplementedError(f"{self.__class__.__name__}.normalize_translation")
class BaseExtrinsicMapping(BaseMapping):
......@@ -82,6 +82,9 @@ class BaseExtrinsicMapping(BaseMapping):
"""
raise NotImplementedError(f"{cls.__name__}.extrinsic_metadata_formats")
def normalize_translation(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
return compact(metadata, forgefed=True)
class BaseIntrinsicMapping(BaseMapping):
"""Base class for intrinsic-metadata mappings to inherit from
......@@ -99,6 +102,9 @@ class BaseIntrinsicMapping(BaseMapping):
"""
raise NotImplementedError(f"{cls.__name__}.detect_metadata_files")
def normalize_translation(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
return compact(metadata, forgefed=False)
class SingleFileIntrinsicMapping(BaseIntrinsicMapping):
"""Base class for all intrinsic metadata mappings that use a single file as input."""
......
......@@ -5,6 +5,14 @@
from swh.indexer.metadata_dictionary import MAPPINGS
CONTEXT = [
"https://doi.org/10.5063/schema/codemeta-2.0",
{
"as": "https://www.w3.org/ns/activitystreams#",
"forge": "https://forgefed.org/ns#",
},
]
def test_compute_metadata_none():
"""
......@@ -111,11 +119,11 @@ def test_compute_metadata_github():
"""
result = MAPPINGS["GitHubMapping"]().translate(content)
assert result == {
"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
"@context": CONTEXT,
"type": "https://forgefed.org/ns#Repository",
"https://forgefed.org/ns#forks": {
"https://www.w3.org/ns/activitystreams#totalItems": 1,
"type": "https://www.w3.org/ns/activitystreams#OrderedCollection",
"forge:forks": {
"as:totalItems": 1,
"type": "as:OrderedCollection",
},
"license": "https://spdx.org/licenses/GPL-3.0",
"name": "SoftwareHeritage/swh-indexer",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment