From ad92ca25c0ecbe155ddbb7153fded36e701fbd2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A9l=C3=A8ne=20Jonin?= <helene.jonin@gmail.com> Date: Fri, 2 Aug 2024 14:02:02 +0000 Subject: [PATCH] Add cff to bibtex converter --- swh/indexer/bibtex.py | 44 ++++++-- swh/indexer/metadata_dictionary/cff.py | 4 +- .../tests/metadata_dictionary/test_cff.py | 1 + swh/indexer/tests/test_bibtex.py | 101 +++++++++++++++++- 4 files changed, 141 insertions(+), 9 deletions(-) diff --git a/swh/indexer/bibtex.py b/swh/indexer/bibtex.py index dba65dcb..a232ee49 100644 --- a/swh/indexer/bibtex.py +++ b/swh/indexer/bibtex.py @@ -6,14 +6,16 @@ import collections import json import sys -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional import uuid from pybtex.database import Entry, Person import rdflib from swh.indexer.codemeta import compact, expand +from swh.indexer.metadata_dictionary.cff import CffMapping from swh.indexer.namespaces import RDF, SCHEMA, SPDX_LICENSES +from swh.model.swhids import ObjectType, QualifiedSWHID TMP_ROOT_URI_PREFIX = "https://www.softwareheritage.org/schema/2022/indexer/tmp-node/" """IRI used for `skolemization <https://www.w3.org/TR/rdf11-concepts/#section-skolemization>`_; @@ -21,7 +23,9 @@ it is not used outside :func:`codemeta_to_bibtex`. """ -def codemeta_to_bibtex(doc: Dict[str, Any]) -> str: +def codemeta_to_bibtex( + doc: Dict[str, Any], swhid: Optional[QualifiedSWHID] = None +) -> str: doc = compact(doc, False) identifiers = [] @@ -54,15 +58,23 @@ def codemeta_to_bibtex(doc: Dict[str, Any]) -> str: fields: Dict[str, Any] = {} def add_person(persons: List[Person], person_id: rdflib.term.Node) -> None: + person = Person() for _, _, name in g.triples((person_id, SCHEMA.name, None)): if (person_id, RDF.type, SCHEMA.Organization) in g: # prevent interpreting the name as "Firstname Lastname" and reformatting # it to "Lastname, Firstname" - person = Person(last=name) + person.last_names.append(name) else: person = Person(name) - if person not in persons: - persons.append(person) + + for _, _, given_name in g.triples((person_id, SCHEMA.givenName, None)): + person.first_names.append(given_name) + + for _, _, family_name in g.triples((person_id, SCHEMA.familyName, None)): + person.last_names.append(family_name) + + if str(person) and person not in persons: + persons.append(person) def add_affiliations(person: rdflib.term.Node) -> None: for _, _, organization in g.triples((person, SCHEMA.affiliation, None)): @@ -160,7 +172,20 @@ def codemeta_to_bibtex(doc: Dict[str, Any]) -> str: for _, _, version in g.triples((id_, SCHEMA.version, None)): fields["version"] = version - entry_type = "softwareversion" if "version" in fields else "software" + # entry_type + if swhid: + fields["swhid"] = str(swhid) + if swhid.object_type == ObjectType.SNAPSHOT: + entry_type = "software" + elif swhid.object_type == ObjectType.CONTENT: + entry_type = "codefragment" + else: + entry_type = "softwareversion" + elif "version" in fields: + entry_type = "softwareversion" + else: + entry_type = "software" + entry = Entry( entry_type, persons=persons, @@ -171,6 +196,13 @@ def codemeta_to_bibtex(doc: Dict[str, Any]) -> str: return entry.to_string(bib_format="bibtex") +def cff_to_bibtex(content: str, swhid: Optional[QualifiedSWHID] = None) -> str: + codemeta = CffMapping().translate(raw_content=content.encode("utf-8")) + if codemeta is None: + codemeta = {} + return codemeta_to_bibtex(codemeta, swhid) + + if __name__ == "__main__": for filename in sys.argv[1:]: if filename == "-": diff --git a/swh/indexer/metadata_dictionary/cff.py b/swh/indexer/metadata_dictionary/cff.py index 0d730e88..93d32b21 100644 --- a/swh/indexer/metadata_dictionary/cff.py +++ b/swh/indexer/metadata_dictionary/cff.py @@ -25,9 +25,9 @@ class CffMapping(YamlMapping, SingleFileIntrinsicMapping): name = "cff" filename = b"CITATION.cff" mapping = CROSSWALK_TABLE["Citation File Format Core (CFF-Core) 1.0.2"] - string_fields = ["keywords", "license", "abstract", "version", "doi"] + string_fields = ["title", "keywords", "license", "abstract", "version", "doi"] date_fields = ["date-released"] - uri_fields = ["repository-code"] + uri_fields = ["url", "repository-code"] def _translate_author(self, graph: Graph, author: dict) -> rdflib.term.Node: node: rdflib.term.Node diff --git a/swh/indexer/tests/metadata_dictionary/test_cff.py b/swh/indexer/tests/metadata_dictionary/test_cff.py index fb50ba5d..35964faf 100644 --- a/swh/indexer/tests/metadata_dictionary/test_cff.py +++ b/swh/indexer/tests/metadata_dictionary/test_cff.py @@ -85,6 +85,7 @@ RIS, schema.org, CodeMeta, and .zenodo.json.""", "identifier": "https://doi.org/10.5281/zenodo.1162057", "license": "https://spdx.org/licenses/Apache-2.0", "version": "1.4.0-alpha0", + "name": "cffconvert", } assert expected == result diff --git a/swh/indexer/tests/test_bibtex.py b/swh/indexer/tests/test_bibtex.py index e0220d71..c2f4a861 100644 --- a/swh/indexer/tests/test_bibtex.py +++ b/swh/indexer/tests/test_bibtex.py @@ -7,7 +7,8 @@ import textwrap import pytest -from swh.indexer.bibtex import codemeta_to_bibtex +from swh.indexer.bibtex import cff_to_bibtex, codemeta_to_bibtex +from swh.model.swhids import QualifiedSWHID def test_empty(): @@ -273,3 +274,101 @@ def test_affiliation(): } """ ) + + +def test_cff_empty(): + assert cff_to_bibtex("") == textwrap.dedent( + """\ + @software{REPLACEME + } + """ + ) + + +def test_cff_invalid(): + assert cff_to_bibtex("foo") == textwrap.dedent( + """\ + @software{REPLACEME + } + """ + ) + + +def test_cff_minimal(): + assert ( + cff_to_bibtex( + """ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: + - family-names: Druskat + given-names: Stephan +title: "My Research Software" +date-released: 2021-08-11 +url: "http://example.org/" + """ + ) + == textwrap.dedent( + """\ + @software{REPLACEME, + author = "Druskat, Stephan", + date = "2021-08-11", + year = "2021", + month = "08", + title = "My Research Software", + url = "http://example.org/" + } + """ + ) + ) + + +def test_swhid_type_snp(): + assert codemeta_to_bibtex( + { + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + }, + QualifiedSWHID.from_string( + "swh:1:snp:da39a3ee5e6b4b0d3255bfef95601890afd80709" + ), + ) == textwrap.dedent( + """\ + @software{REPLACEME, + swhid = "swh:1:snp:da39a3ee5e6b4b0d3255bfef95601890afd80709" + } + """ + ) + + +def test_swhid_type_rev(): + assert codemeta_to_bibtex( + { + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + }, + QualifiedSWHID.from_string( + "swh:1:rev:5b909292bcfe6099d726c0b5194165c72f93b767" + ), + ) == textwrap.dedent( + """\ + @softwareversion{REPLACEME, + swhid = "swh:1:rev:5b909292bcfe6099d726c0b5194165c72f93b767" + } + """ + ) + + +def test_swhid_type_cnt(): + assert codemeta_to_bibtex( + { + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + }, + QualifiedSWHID.from_string( + "swh:1:cnt:5b909292bcfe6099d726c0b5194165c72f93b767;lines=5-10" + ), + ) == textwrap.dedent( + """\ + @codefragment{REPLACEME, + swhid = "swh:1:cnt:5b909292bcfe6099d726c0b5194165c72f93b767;lines=5-10" + } + """ + ) -- GitLab