diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py index 7316d8cd7600bf3399e2544244ea465143735302..5d456c95a3a5a4ca8cad7eb6a8bcaa618cea6fe2 100644 --- a/swh/search/elasticsearch.py +++ b/swh/search/elasticsearch.py @@ -393,6 +393,14 @@ class ElasticSearch: else: return document["_source"] + def origin_delete(self, url: str) -> bool: + origin_id = hash_to_hex(model.Origin(url=url).id) + try: + self._backend.delete(index=self._get_origin_read_alias(), id=origin_id) + except NotFoundError: + return False + return True + @timed def origin_search( self, diff --git a/swh/search/in_memory.py b/swh/search/in_memory.py index 3a9f4d22b9c79b93c8ef15e3f6dcb2b415b0f0df..9cf70d58dff7dc0dc13b786a2eb4dc0e487a635f 100644 --- a/swh/search/in_memory.py +++ b/swh/search/in_memory.py @@ -554,6 +554,18 @@ class InMemorySearch: else: return {k: v for (k, v) in document.items() if k != "_url_tokens"} + def origin_delete(self, url: str) -> bool: + origin_id = hash_to_hex(model.Origin(url=url).id) + try: + del self._origins[origin_id] + except KeyError: + return False + try: + self._origin_ids.remove(origin_id) + except ValueError: + assert False, "this should not have happened" + return True + def visit_types_count(self) -> Counter: hits = self._get_hits() return Counter(chain(*[hit.get("visit_types", []) for hit in hits])) diff --git a/swh/search/interface.py b/swh/search/interface.py index 2c38665be7283b7433427a8c20b1c869b5a86d64..a2f25bcbeb78ab93f4be3f8127498212f8af15fd 100644 --- a/swh/search/interface.py +++ b/swh/search/interface.py @@ -142,6 +142,15 @@ class SearchInterface(Protocol): Order is arbitrary; unknown origins are not returned. """ + @remote_api_endpoint("origin/delete") + def origin_delete(self, url: str) -> bool: + """Remove the documents associated with the given origin URL. + + Returns: + True if the document was removed, False if it could not be found. + """ + ... + @remote_api_endpoint("visit_types_count") def visit_types_count(self) -> Counter: """Returns origin counts per visit type (git, hg, svn, ...).""" diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py index afd2a8d27be8e68cca0761a927eb47b0f0d882bc..e0b2b40731217e2a8656dec65e559b2e7d8c6149 100644 --- a/swh/search/tests/test_search.py +++ b/swh/search/tests/test_search.py @@ -1503,6 +1503,28 @@ class CommonSearchTest: assert self.search.origin_get(origin3["url"]) == expanded_origins[2] assert self.search.origin_get("http://origin4") is None + def test_origin_delete(self): + origin1 = {"url": "http://one", "visit_types": ["git"], "has_visits": True} + origin2 = {"url": "http://two", "visit_types": ["git"], "has_visits": True} + self.search.origin_update([origin1, origin2]) + self.search.flush() + + assert self.search.origin_get("http://one")["url"] == "http://one" + assert len(self.search.origin_search(url_pattern="one", limit=1).results) == 1 + deleted = self.search.origin_delete("http://one") + assert deleted, "origin not found" + self.search.flush() + + assert self.search.origin_get("http://one") is None + assert len(self.search.origin_search(url_pattern="one", limit=1).results) == 0 + assert self.search.origin_get("http://two")["url"] == "http://two" + + # Ensure idempotency + deleted = self.search.origin_delete("http://one") + self.search.flush() + assert not deleted, "origin deleted twice‽" + assert self.search.origin_get("http://one") is None + def test_visit_types_count(self): assert self.search.visit_types_count() == Counter()