From e42deb6d0086a57e88b65e6c6d7b087d807e78bb Mon Sep 17 00:00:00 2001 From: Antoine Lambert <anlambert@softwareheritage.org> Date: Fri, 8 Nov 2024 12:46:40 +0100 Subject: [PATCH] bibtex: Robustify code extracting year and month from date Previous implementation could lead to errors when an invalid date is present in source codemeta. --- swh/indexer/bibtex.py | 13 +++++++++---- swh/indexer/tests/test_bibtex.py | 23 +++++++++++++++++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/swh/indexer/bibtex.py b/swh/indexer/bibtex.py index 446ce4d..891cd11 100644 --- a/swh/indexer/bibtex.py +++ b/swh/indexer/bibtex.py @@ -11,6 +11,7 @@ import sys from typing import Any, Dict, List, Optional import uuid +import iso8601 from pybtex.database import Entry, Person from pybtex.database.output.bibtex import Writer from pybtex.plugin import register_plugin @@ -130,10 +131,14 @@ def codemeta_to_bibtex( fields["date"] = date break if "date" in fields: - (fields["year"], month_number, _) = fields["date"].split("-") - fields["month"] = ( - f"{MACRO_PREFIX}:{calendar.month_abbr[int(month_number)].lower()}" - ) + try: + parsed_date = iso8601.parse_date(fields["date"]) + fields["year"] = str(parsed_date.year) + fields["month"] = ( + f"{MACRO_PREFIX}:{calendar.month_abbr[parsed_date.month].lower()}" + ) + except iso8601.ParseError: + pass # identifier, doi, hal_id entry_key = None diff --git a/swh/indexer/tests/test_bibtex.py b/swh/indexer/tests/test_bibtex.py index fa6fbf1..a820861 100644 --- a/swh/indexer/tests/test_bibtex.py +++ b/swh/indexer/tests/test_bibtex.py @@ -276,6 +276,29 @@ def test_affiliation(): ) +def test_invalid_date(): + assert codemeta_to_bibtex( + { + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "author": {"name": "Jane Doe"}, + "name": "Example Software", + "url": "http://example.org/", + "datePublished": "TBD", + "license": "https://spdx.org/licenses/Apache-2.0", + } + ) == textwrap.dedent( + """\ + @software{REPLACEME, + author = "Doe, Jane", + license = "Apache-2.0", + date = "TBD", + title = "Example Software", + url = "http://example.org/" + } + """ + ) + + def test_cff_empty(): assert cff_to_bibtex("") == textwrap.dedent( """\ -- GitLab