Skip to content
Snippets Groups Projects
Verified Commit 40adc8c2 authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

deposit: Add utils to parse metadata provenance from atom entry file

Related to T3677
parent e1549dc0
No related branches found
No related tags found
Loading
<?xml version="1.0"?>
<entry xmlns="http://www.w3.org/2005/Atom"
xmlns:schema="https://schema.org/"
xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
<title>Awesome stuff</title>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<author>ssedud</author>
<swh:deposit>
<swh:metadata-provenance>
<schema:url>{url}</schema:url>
</swh:metadata-provenance>
</swh:deposit>
</entry>
# Copyright (C) 2018-2020 The Software Heritage developers
# Copyright (C) 2018-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -196,11 +196,12 @@ def test_parse_swh_reference_origin(xml_with_origin_reference):
@pytest.fixture
def xml_with_empty_reference():
def xml_swh_deposit_template():
xml_data = """<?xml version="1.0"?>
<entry xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
<entry xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit"
xmlns:schema="https://schema.org/">
<swh:deposit>
{swh_reference}
{swh_deposit}
</swh:deposit>
</entry>
"""
......@@ -216,8 +217,8 @@ def xml_with_empty_reference():
"""<swh:reference><swh:object swhid="" /></swh:reference>""",
],
)
def test_parse_swh_reference_empty(xml_with_empty_reference, xml_ref):
xml_body = xml_with_empty_reference.format(swh_reference=xml_ref)
def test_parse_swh_reference_empty(xml_swh_deposit_template, xml_ref):
xml_body = xml_swh_deposit_template.format(swh_deposit=xml_ref)
metadata = utils.parse_xml(xml_body)
assert utils.parse_swh_reference(metadata) is None
......@@ -271,3 +272,32 @@ def test_parse_swh_reference_invalid_swhid(invalid_swhid, xml_with_swhid):
with pytest.raises(ValidationError):
utils.parse_swh_reference(metadata)
@pytest.mark.parametrize(
"xml_ref",
[
"",
"<swh:metadata-provenance></swh:metadata-provenance>",
"<swh:metadata-provenance><schema:url /></swh:metadata-provenance>",
],
)
def test_parse_swh_metatada_provenance_empty(xml_swh_deposit_template, xml_ref):
xml_body = xml_swh_deposit_template.format(swh_deposit=xml_ref)
metadata = utils.parse_xml(xml_body)
assert utils.parse_swh_metadata_provenance(metadata) is None
@pytest.fixture
def xml_with_metadata_provenance(atom_dataset):
return atom_dataset["entry-data-with-metadata-provenance"]
def test_parse_swh_metadata_provenance2(xml_with_metadata_provenance):
xml_data = xml_with_metadata_provenance.format(url="https://url.org/metadata/url")
metadata = utils.parse_xml(xml_data)
actual_url = utils.parse_swh_metadata_provenance(metadata)
assert actual_url == "https://url.org/metadata/url"
# Copyright (C) 2018-2020 The Software Heritage developers
# Copyright (C) 2018-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -25,6 +25,7 @@ def parse_xml(stream, encoding="utf-8"):
"https://doi.org/10.5063/SCHEMA/CODEMETA-2.0": "codemeta",
"http://purl.org/net/sword/terms/": "sword",
"https://www.softwareheritage.org/schema/2018/deposit": "swh",
"https://schema.org/": "schema",
}
data = xmltodict.parse(
......@@ -146,6 +147,42 @@ ALLOWED_QUALIFIERS_NODE_TYPE = (
)
def parse_swh_metadata_provenance(
metadata: Dict,
) -> Optional[Union[QualifiedSWHID, str]]:
"""Parse swh metadata-provenance within the metadata dict reference if found, None
otherwise.
.. code-block:: xml
<swh:deposit>
<swh:metadata-provenance>
<schema:url>https://url.org/metadata/url</schema:url>
</swh:metadata-provenance>
</swh:deposit>
Args:
metadata: result of parsing an Atom document with :func:`parse_xml`
Raises:
ValidationError in case of invalid xml
Returns:
Either the metadata provenance url if any or None otherwise
"""
swh_deposit = metadata.get("swh:deposit")
if not swh_deposit:
return None
swh_metadata_provenance = swh_deposit.get("swh:metadata-provenance")
if not swh_metadata_provenance:
return None
return swh_metadata_provenance.get("schema:url")
def parse_swh_reference(metadata: Dict,) -> Optional[Union[QualifiedSWHID, str]]:
"""Parse swh reference within the metadata dict (or origin) reference if found,
None otherwise.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment