From a3d66736a416c7fd13186a22cf3afbad639edbd6 Mon Sep 17 00:00:00 2001
From: Antoine Lambert <anlambert@softwareheritage.org>
Date: Mon, 10 Feb 2025 14:26:42 +0100
Subject: [PATCH] maven: Update test that is now failing since beautifulsoup4
 4.13

Latest beautifulsoup4 release (4.13) seems to have fixed issues
related to unexpected encodings in XML files so a test that was
passing previously is now failing.

Update that test to check origin URL and visit type can be
successfully extracted from a POM file with unexpected encoding.
---
 requirements.txt                      |  2 +-
 swh/lister/maven/tests/test_lister.py | 15 ++++++++++-----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index be4c6fc6..bd20daf6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-beautifulsoup4
+beautifulsoup4 >= 4.13.3
 breezy >= 3.3.1, < 3.3.5 # use versions with available binary wheels
 dateparser
 dulwich
diff --git a/swh/lister/maven/tests/test_lister.py b/swh/lister/maven/tests/test_lister.py
index 5f879de8..199b655c 100644
--- a/swh/lister/maven/tests/test_lister.py
+++ b/swh/lister/maven/tests/test_lister.py
@@ -356,8 +356,9 @@ def test_maven_lister_null_mtime(swh_scheduler, requests_mock, maven_index_null_
 
 
 def test_maven_list_pom_bad_encoding(swh_scheduler, requests_mock):
-    """should continue listing when failing to decode pom file."""
-    # Test failure of pom parsing by reencoding a UTF-8 pom file to a not expected one
+    """should successfully parse a pom file with unexpected encoding
+    (beautifulsoup4 >= 4.13)."""
+    # Test pom parsing by reencoding a UTF-8 pom file to a not expected one
     requests_mock.get(
         URL_POM_1,
         content=requests.get(URL_POM_1).content.decode("utf-8").encode("utf-32"),
@@ -367,10 +368,14 @@ def test_maven_list_pom_bad_encoding(swh_scheduler, requests_mock):
 
     lister.run()
 
-    # If the maven_index_full step succeeded but not the pom parsing step,
-    # then we get only one maven-jar origin and one git origin.
+    # we should get one maven-jar origin and two git origins.
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
-    assert len(scheduler_origins) == 2
+    assert len(scheduler_origins) == 3
+
+    # git origin parsed from pom file with unexpected encoding
+    assert ("https://github.com/aldialimucaj/sprova4j", "git") in [
+        (o.url, o.visit_type) for o in scheduler_origins
+    ]
 
 
 def test_maven_list_pom_multi_byte_encoding(swh_scheduler, requests_mock, datadir):
-- 
GitLab