From accabc731b105bde1df6940611b8e41da20e6cc3 Mon Sep 17 00:00:00 2001
From: Valentin Lorentz <vlorentz@softwareheritage.org>
Date: Mon, 27 Nov 2023 15:18:07 +0100
Subject: [PATCH] Remove support for external_identifier

It was deprecated years ago, and no client used it in staging or prod
since 2022-04-11
---
 docs/specs/spec-loading.rst                   |  1 -
 swh/deposit/api/checks.py                     | 14 +++-
 swh/deposit/api/common.py                     | 29 +------
 .../api/test_collection_add_to_origin.py      | 24 ------
 .../tests/api/test_collection_post_atom.py    | 84 -------------------
 .../tests/api/test_collection_reuse_slug.py   | 40 ---------
 .../tests/api/test_deposit_update_atom.py     | 37 --------
 ...ith-both-add-to-origin-and-external-id.xml | 14 ----
 ...-external-identifier-and-create-origin.xml | 14 ----
 .../atom/error-with-external-identifier.xml   |  7 --
 .../1_private_test_999_meta                   |  2 -
 11 files changed, 11 insertions(+), 255 deletions(-)
 delete mode 100644 swh/deposit/tests/data/atom/entry-data-with-both-add-to-origin-and-external-id.xml
 delete mode 100644 swh/deposit/tests/data/atom/error-with-external-identifier-and-create-origin.xml
 delete mode 100644 swh/deposit/tests/data/atom/error-with-external-identifier.xml

diff --git a/docs/specs/spec-loading.rst b/docs/specs/spec-loading.rst
index c1420b4c..17140079 100644
--- a/docs/specs/spec-loading.rst
+++ b/docs/specs/spec-loading.rst
@@ -316,7 +316,6 @@ It contains the same fields as any revision object; in particular:
             "codemeta:url": "https://hal.archives-ouvertes.fr/hal-01883795",
             "codemeta:version": "1",
             "committer": "David Picard",
-            "external_identifier": "hal-01883795",
             "id": "hal-01883795"
         },
         "parents": [],
diff --git a/swh/deposit/api/checks.py b/swh/deposit/api/checks.py
index 3b942a5c..8237ff59 100644
--- a/swh/deposit/api/checks.py
+++ b/swh/deposit/api/checks.py
@@ -61,9 +61,6 @@ ATOM_ELEMENTS = [
     "summary",
     "title",
     "updated",
-    # we used to recommend this, so we still need to support it so we don't break
-    # existing clients
-    "external_identifier",
 ]
 
 # from https://github.com/codemeta/codemeta/blob/2.0/codemeta.jsonld
@@ -350,7 +347,16 @@ def check_metadata(metadata: ElementTree.Element) -> Tuple[bool, Optional[Dict]]
         if element.tag.startswith("{http://www.w3.org/2005/Atom}"):
             _, local_name = element.tag.split("}", 1)
             if local_name not in ATOM_ELEMENTS:
-                if local_name in CODEMETA2_CONTEXT:
+                if local_name == "external_identifier":
+                    detail.append(
+                        {
+                            "fields": [local_name],
+                            "summary": "<external_identifier> is not supported anymore, "
+                            "<swh:create_origin> or <swh:add_to_origin> should be used "
+                            "instead.",
+                        }
+                    )
+                elif local_name in CODEMETA2_CONTEXT:
                     # Probably confused the two namespaces, display a nicer error
                     detail.append(
                         {
diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py
index 7e970d05..7f82365e 100644
--- a/swh/deposit/api/common.py
+++ b/swh/deposit/api/common.py
@@ -29,11 +29,7 @@ from swh.deposit.api.converters import convert_status_detail
 from swh.deposit.auth import HasDepositPermission, KeycloakBasicAuthentication
 from swh.deposit.models import DEPOSIT_METADATA_ONLY, Deposit
 from swh.deposit.parsers import parse_xml
-from swh.deposit.utils import (
-    NAMESPACES,
-    compute_metadata_context,
-    parse_swh_metadata_provenance,
-)
+from swh.deposit.utils import compute_metadata_context, parse_swh_metadata_provenance
 from swh.model import hashutil
 from swh.model.model import (
     MetadataAuthority,
@@ -945,29 +941,6 @@ class APIBase(APIConfig, APIView, metaclass=ABCMeta):
             )
             deposit.origin_url = origin_url
 
-        external_identifier_element = metadata.find(
-            "atom:external_identifier", namespaces=NAMESPACES
-        )
-        if external_identifier_element is not None:
-            # Deprecated tag.
-            # When clients stopped using it, this should raise an error
-            # unconditionally
-
-            if deposit.origin_url:
-                raise DepositError(
-                    BAD_REQUEST,
-                    "<external_identifier> is deprecated, you should only use "
-                    "<swh:create_origin> and <swh:add_to_origin> from now on.",
-                )
-
-            if headers.slug and external_identifier_element.text != headers.slug:
-                raise DepositError(
-                    BAD_REQUEST,
-                    "The <external_identifier> tag and Slug header are deprecated, "
-                    "<swh:create_origin> or <swh:add_to_origin> "
-                    "should be used instead.",
-                )
-
     def _empty_post(
         self,
         request: Request,
diff --git a/swh/deposit/tests/api/test_collection_add_to_origin.py b/swh/deposit/tests/api/test_collection_add_to_origin.py
index 666bedc6..91cef83f 100644
--- a/swh/deposit/tests/api/test_collection_add_to_origin.py
+++ b/swh/deposit/tests/api/test_collection_add_to_origin.py
@@ -109,30 +109,6 @@ def test_add_deposit_add_to_wrong_origin(
     assert b"must start with" in response.content
 
 
-def test_add_deposit_with_add_to_origin_and_external_identifier(
-    authenticated_client,
-    deposit_collection,
-    completed_deposit,
-    atom_dataset,
-    deposit_user,
-):
-    """Posting deposit with <swh:add_to_origin> creates a new deposit with parent"""
-    # given multiple deposit already loaded
-    origin_url = deposit_user.provider_url + completed_deposit.external_id
-
-    # adding a new deposit with the same external id as a completed deposit
-    # creates the parenting chain
-    response = post_atom(
-        authenticated_client,
-        reverse(COL_IRI, args=[deposit_collection.name]),
-        data=atom_dataset["entry-data-with-both-add-to-origin-and-external-id"]
-        % origin_url,
-    )
-
-    assert response.status_code == status.HTTP_400_BAD_REQUEST
-    assert b"&lt;external_identifier&gt; is deprecated" in response.content
-
-
 def test_post_deposit_atom_403_add_to_wrong_origin_url_prefix(
     authenticated_client, deposit_collection, atom_dataset, deposit_user
 ):
diff --git a/swh/deposit/tests/api/test_collection_post_atom.py b/swh/deposit/tests/api/test_collection_post_atom.py
index a613b4ec..7b2b0f2c 100644
--- a/swh/deposit/tests/api/test_collection_post_atom.py
+++ b/swh/deposit/tests/api/test_collection_post_atom.py
@@ -286,90 +286,6 @@ def test_post_deposit_atom_no_origin_url_nor_slug_header(
     assert deposit.status == DEPOSIT_STATUS_DEPOSITED
 
 
-def test_post_deposit_atom_with_slug_and_external_identifier(
-    authenticated_client, deposit_collection, deposit_user, atom_dataset, mocker
-):
-    """Even though <external_identifier> is deprecated, it should still be
-    allowed when it matches the slug, so that we don't break existing clients
-
-    """
-    url = reverse(COL_IRI, args=[deposit_collection.name])
-
-    slug = str(uuid.uuid4())
-
-    # when
-    response = post_atom(
-        authenticated_client,
-        url,
-        data=atom_dataset["error-with-external-identifier"] % slug,
-        HTTP_IN_PROGRESS="false",
-        HTTP_SLUG=slug,
-    )
-
-    assert response.status_code == status.HTTP_201_CREATED
-    response_content = ElementTree.fromstring(response.content)
-    deposit_id = int(response_content.findtext("swh:deposit_id", namespaces=NAMESPACES))
-
-    deposit = Deposit.objects.get(pk=deposit_id)
-    assert deposit.collection == deposit_collection
-    assert deposit.origin_url == deposit_user.provider_url + slug
-    assert deposit.status == DEPOSIT_STATUS_DEPOSITED
-
-
-def test_post_deposit_atom_with_mismatched_slug_and_external_identifier(
-    authenticated_client, deposit_collection, atom_dataset
-):
-    """Posting an atom entry with mismatched slug header and external_identifier
-    should return a 400
-
-    """
-    external_id = "foobar"
-    url = reverse(COL_IRI, args=[deposit_collection.name])
-
-    # when
-    response = post_atom(
-        authenticated_client,
-        url,
-        data=atom_dataset["error-with-external-identifier"] % external_id,
-        HTTP_IN_PROGRESS="false",
-        HTTP_SLUG="something",
-    )
-
-    assert (
-        b"The &lt;external_identifier&gt; tag and Slug header are deprecated"
-        in response.content
-    )
-    assert response.status_code == status.HTTP_400_BAD_REQUEST
-
-
-def test_post_deposit_atom_with_create_origin_and_external_identifier(
-    authenticated_client, deposit_collection, atom_dataset, deposit_user
-):
-    """<atom:external_identifier> was deprecated before <swh:create_origin>
-    was introduced, clients should get an error when trying to use both
-
-    """
-    external_id = "foobar"
-    origin_url = deposit_user.provider_url + external_id
-    url = reverse(COL_IRI, args=[deposit_collection.name])
-
-    document = atom_dataset["error-with-external-identifier-and-create-origin"].format(
-        external_id=external_id,
-        url=origin_url,
-    )
-
-    # when
-    response = post_atom(
-        authenticated_client,
-        url,
-        data=document,
-        HTTP_IN_PROGRESS="false",
-    )
-
-    assert b"&lt;external_identifier&gt; is deprecated" in response.content
-    assert response.status_code == status.HTTP_400_BAD_REQUEST
-
-
 def test_post_deposit_atom_with_create_origin_and_reference(
     authenticated_client, deposit_collection, atom_dataset, deposit_user
 ):
diff --git a/swh/deposit/tests/api/test_collection_reuse_slug.py b/swh/deposit/tests/api/test_collection_reuse_slug.py
index ed6fe1c0..6968f51b 100644
--- a/swh/deposit/tests/api/test_collection_reuse_slug.py
+++ b/swh/deposit/tests/api/test_collection_reuse_slug.py
@@ -145,46 +145,6 @@ def test_add_deposit_when_done_makes_new_deposit_with_parent_old_one(
     assert new_deposit.origin_url == origin_url
 
 
-def test_add_deposit_with_external_identifier(
-    authenticated_client,
-    deposit_collection,
-    completed_deposit,
-    atom_dataset,
-    deposit_user,
-):
-    """Even though <external_identifier> is deprecated, it should still be
-    allowed when it matches the slug, so that we don't break existing clients
-
-    """
-    # given multiple deposit already loaded
-    deposit = completed_deposit
-    assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
-    origin_url = deposit_user.provider_url + deposit.external_id
-
-    # adding a new deposit with the same external id as a completed deposit
-    # creates the parenting chain
-    response = post_atom(
-        authenticated_client,
-        reverse(COL_IRI, args=[deposit_collection.name]),
-        data=atom_dataset["error-with-external-identifier"] % deposit.external_id,
-        HTTP_SLUG=deposit.external_id,
-    )
-
-    assert response.status_code == status.HTTP_201_CREATED
-    response_content = parse_xml(response.content)
-    deposit_id = int(response_content.findtext("swh:deposit_id", namespaces=NAMESPACES))
-
-    assert deposit_id != deposit.id
-
-    new_deposit = Deposit.objects.get(pk=deposit_id)
-    assert deposit.collection == new_deposit.collection
-    assert deposit.origin_url == origin_url
-
-    assert new_deposit != deposit
-    assert new_deposit.parent == deposit
-    assert new_deposit.origin_url == origin_url
-
-
 def test_add_deposit_external_id_conflict_no_parent(
     authenticated_client,
     deposit_collection,
diff --git a/swh/deposit/tests/api/test_deposit_update_atom.py b/swh/deposit/tests/api/test_deposit_update_atom.py
index 4809d982..d8ac723e 100644
--- a/swh/deposit/tests/api/test_deposit_update_atom.py
+++ b/swh/deposit/tests/api/test_deposit_update_atom.py
@@ -523,43 +523,6 @@ def test_put_update_metadata_done_deposit_failure_functional_checks(
     assert msg in response.content
 
 
-def test_put_atom_with_create_origin_and_external_identifier(
-    authenticated_client, deposit_collection, atom_dataset, deposit_user
-):
-    """<atom:external_identifier> was deprecated before <swh:create_origin>
-    was introduced, clients should get an error when trying to use both
-
-    """
-    external_id = "foobar"
-    origin_url = deposit_user.provider_url + external_id
-    url = reverse(COL_IRI, args=[deposit_collection.name])
-
-    response = post_atom(
-        authenticated_client,
-        url,
-        data=atom_dataset["entry-data0"] % origin_url,
-        HTTP_IN_PROGRESS="true",
-    )
-
-    assert response.status_code == status.HTTP_201_CREATED
-    response_content = parse_xml(response.content)
-
-    edit_iri = response_content.find(
-        "atom:link[@rel='edit']", namespaces=NAMESPACES
-    ).attrib["href"]
-
-    # when
-    response = put_atom(
-        authenticated_client,
-        edit_iri,
-        data=atom_dataset["error-with-external-identifier"] % external_id,
-        HTTP_IN_PROGRESS="false",
-    )
-
-    assert b"&lt;external_identifier&gt; is deprecated" in response.content
-    assert response.status_code == status.HTTP_400_BAD_REQUEST
-
-
 def test_put_atom_with_create_origin_and_reference(
     authenticated_client, deposit_collection, atom_dataset, deposit_user
 ):
diff --git a/swh/deposit/tests/data/atom/entry-data-with-both-add-to-origin-and-external-id.xml b/swh/deposit/tests/data/atom/entry-data-with-both-add-to-origin-and-external-id.xml
deleted file mode 100644
index 9c188f84..00000000
--- a/swh/deposit/tests/data/atom/entry-data-with-both-add-to-origin-and-external-id.xml
+++ /dev/null
@@ -1,14 +0,0 @@
-<?xml version="1.0"?>
-<entry xmlns="http://www.w3.org/2005/Atom"
-         xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
-         xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
-    <title>Awesome Compiler</title>
-    <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
-    <author>dudess</author>
-    <external_identifier>foo</external_identifier>
-    <swh:deposit>
-      <swh:add_to_origin>
-        <swh:origin url="%s" />
-      </swh:add_to_origin>
-    </swh:deposit>
-</entry>
diff --git a/swh/deposit/tests/data/atom/error-with-external-identifier-and-create-origin.xml b/swh/deposit/tests/data/atom/error-with-external-identifier-and-create-origin.xml
deleted file mode 100644
index 4b6436f3..00000000
--- a/swh/deposit/tests/data/atom/error-with-external-identifier-and-create-origin.xml
+++ /dev/null
@@ -1,14 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<entry xmlns="http://www.w3.org/2005/Atom"
-       xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
-       xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
-  <title>Composing a Web of Audio Applications</title>
-  <id>hal-01243065</id>
-  <external_identifier>{external_id}</external_identifier>
-  <author>someone</author>
-  <swh:deposit>
-    <swh:create_origin>
-      <swh:origin url="{url}" />
-    </swh:create_origin>
-  </swh:deposit>
-</entry>
diff --git a/swh/deposit/tests/data/atom/error-with-external-identifier.xml b/swh/deposit/tests/data/atom/error-with-external-identifier.xml
deleted file mode 100644
index 7c9703f5..00000000
--- a/swh/deposit/tests/data/atom/error-with-external-identifier.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<entry xmlns="http://www.w3.org/2005/Atom" xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0">
-  <title>Composing a Web of Audio Applications</title>
-  <id>hal-01243065</id>
-  <author>someone</author>
-  <external_identifier>%s</external_identifier>
-</entry>
diff --git a/swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_meta b/swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_meta
index 01419e3d..34a5804e 100644
--- a/swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_meta
+++ b/swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_meta
@@ -13,7 +13,6 @@
                 "no one"
             ],
             "codemeta:dateCreated": "2017-10-07T15:17:08Z",
-            "external_identifier": "some-external-id",
             "url": "https://hal-test.archives-ouvertes.fr/some-external-id"
         },
         "provider": {
@@ -57,7 +56,6 @@
                        "another one",
                        "no one"],
             "codemeta:dateCreated": "2017-10-07T15:17:08Z",
-            "external_identifier": "some-external-id",
             "url": "https://hal-test.archives-ouvertes.fr/some-external-id"
         },
         "synthetic": "true",
-- 
GitLab