Skip to content
Snippets Groups Projects
Commit ad92ca25 authored by Hélène Jonin's avatar Hélène Jonin Committed by vlorentz
Browse files

Add cff to bibtex converter

parent 46dcb5b2
No related branches found
No related tags found
1 merge request!514Add cff to bibtex converter
Pipeline #10179 passed
......@@ -6,14 +6,16 @@
import collections
import json
import sys
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional
import uuid
from pybtex.database import Entry, Person
import rdflib
from swh.indexer.codemeta import compact, expand
from swh.indexer.metadata_dictionary.cff import CffMapping
from swh.indexer.namespaces import RDF, SCHEMA, SPDX_LICENSES
from swh.model.swhids import ObjectType, QualifiedSWHID
TMP_ROOT_URI_PREFIX = "https://www.softwareheritage.org/schema/2022/indexer/tmp-node/"
"""IRI used for `skolemization <https://www.w3.org/TR/rdf11-concepts/#section-skolemization>`_;
......@@ -21,7 +23,9 @@ it is not used outside :func:`codemeta_to_bibtex`.
"""
def codemeta_to_bibtex(doc: Dict[str, Any]) -> str:
def codemeta_to_bibtex(
doc: Dict[str, Any], swhid: Optional[QualifiedSWHID] = None
) -> str:
doc = compact(doc, False)
identifiers = []
......@@ -54,15 +58,23 @@ def codemeta_to_bibtex(doc: Dict[str, Any]) -> str:
fields: Dict[str, Any] = {}
def add_person(persons: List[Person], person_id: rdflib.term.Node) -> None:
person = Person()
for _, _, name in g.triples((person_id, SCHEMA.name, None)):
if (person_id, RDF.type, SCHEMA.Organization) in g:
# prevent interpreting the name as "Firstname Lastname" and reformatting
# it to "Lastname, Firstname"
person = Person(last=name)
person.last_names.append(name)
else:
person = Person(name)
if person not in persons:
persons.append(person)
for _, _, given_name in g.triples((person_id, SCHEMA.givenName, None)):
person.first_names.append(given_name)
for _, _, family_name in g.triples((person_id, SCHEMA.familyName, None)):
person.last_names.append(family_name)
if str(person) and person not in persons:
persons.append(person)
def add_affiliations(person: rdflib.term.Node) -> None:
for _, _, organization in g.triples((person, SCHEMA.affiliation, None)):
......@@ -160,7 +172,20 @@ def codemeta_to_bibtex(doc: Dict[str, Any]) -> str:
for _, _, version in g.triples((id_, SCHEMA.version, None)):
fields["version"] = version
entry_type = "softwareversion" if "version" in fields else "software"
# entry_type
if swhid:
fields["swhid"] = str(swhid)
if swhid.object_type == ObjectType.SNAPSHOT:
entry_type = "software"
elif swhid.object_type == ObjectType.CONTENT:
entry_type = "codefragment"
else:
entry_type = "softwareversion"
elif "version" in fields:
entry_type = "softwareversion"
else:
entry_type = "software"
entry = Entry(
entry_type,
persons=persons,
......@@ -171,6 +196,13 @@ def codemeta_to_bibtex(doc: Dict[str, Any]) -> str:
return entry.to_string(bib_format="bibtex")
def cff_to_bibtex(content: str, swhid: Optional[QualifiedSWHID] = None) -> str:
codemeta = CffMapping().translate(raw_content=content.encode("utf-8"))
if codemeta is None:
codemeta = {}
return codemeta_to_bibtex(codemeta, swhid)
if __name__ == "__main__":
for filename in sys.argv[1:]:
if filename == "-":
......
......@@ -25,9 +25,9 @@ class CffMapping(YamlMapping, SingleFileIntrinsicMapping):
name = "cff"
filename = b"CITATION.cff"
mapping = CROSSWALK_TABLE["Citation File Format Core (CFF-Core) 1.0.2"]
string_fields = ["keywords", "license", "abstract", "version", "doi"]
string_fields = ["title", "keywords", "license", "abstract", "version", "doi"]
date_fields = ["date-released"]
uri_fields = ["repository-code"]
uri_fields = ["url", "repository-code"]
def _translate_author(self, graph: Graph, author: dict) -> rdflib.term.Node:
node: rdflib.term.Node
......
......@@ -85,6 +85,7 @@ RIS, schema.org, CodeMeta, and .zenodo.json.""",
"identifier": "https://doi.org/10.5281/zenodo.1162057",
"license": "https://spdx.org/licenses/Apache-2.0",
"version": "1.4.0-alpha0",
"name": "cffconvert",
}
assert expected == result
......
......@@ -7,7 +7,8 @@ import textwrap
import pytest
from swh.indexer.bibtex import codemeta_to_bibtex
from swh.indexer.bibtex import cff_to_bibtex, codemeta_to_bibtex
from swh.model.swhids import QualifiedSWHID
def test_empty():
......@@ -273,3 +274,101 @@ def test_affiliation():
}
"""
)
def test_cff_empty():
assert cff_to_bibtex("") == textwrap.dedent(
"""\
@software{REPLACEME
}
"""
)
def test_cff_invalid():
assert cff_to_bibtex("foo") == textwrap.dedent(
"""\
@software{REPLACEME
}
"""
)
def test_cff_minimal():
assert (
cff_to_bibtex(
"""
cff-version: 1.2.0
message: "If you use this software, please cite it as below."
authors:
- family-names: Druskat
given-names: Stephan
title: "My Research Software"
date-released: 2021-08-11
url: "http://example.org/"
"""
)
== textwrap.dedent(
"""\
@software{REPLACEME,
author = "Druskat, Stephan",
date = "2021-08-11",
year = "2021",
month = "08",
title = "My Research Software",
url = "http://example.org/"
}
"""
)
)
def test_swhid_type_snp():
assert codemeta_to_bibtex(
{
"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
},
QualifiedSWHID.from_string(
"swh:1:snp:da39a3ee5e6b4b0d3255bfef95601890afd80709"
),
) == textwrap.dedent(
"""\
@software{REPLACEME,
swhid = "swh:1:snp:da39a3ee5e6b4b0d3255bfef95601890afd80709"
}
"""
)
def test_swhid_type_rev():
assert codemeta_to_bibtex(
{
"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
},
QualifiedSWHID.from_string(
"swh:1:rev:5b909292bcfe6099d726c0b5194165c72f93b767"
),
) == textwrap.dedent(
"""\
@softwareversion{REPLACEME,
swhid = "swh:1:rev:5b909292bcfe6099d726c0b5194165c72f93b767"
}
"""
)
def test_swhid_type_cnt():
assert codemeta_to_bibtex(
{
"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
},
QualifiedSWHID.from_string(
"swh:1:cnt:5b909292bcfe6099d726c0b5194165c72f93b767;lines=5-10"
),
) == textwrap.dedent(
"""\
@codefragment{REPLACEME,
swhid = "swh:1:cnt:5b909292bcfe6099d726c0b5194165c72f93b767;lines=5-10"
}
"""
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment