From a3d66736a416c7fd13186a22cf3afbad639edbd6 Mon Sep 17 00:00:00 2001 From: Antoine Lambert <anlambert@softwareheritage.org> Date: Mon, 10 Feb 2025 14:26:42 +0100 Subject: [PATCH] maven: Update test that is now failing since beautifulsoup4 4.13 Latest beautifulsoup4 release (4.13) seems to have fixed issues related to unexpected encodings in XML files so a test that was passing previously is now failing. Update that test to check origin URL and visit type can be successfully extracted from a POM file with unexpected encoding. --- requirements.txt | 2 +- swh/lister/maven/tests/test_lister.py | 15 ++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index be4c6fc6..bd20daf6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -beautifulsoup4 +beautifulsoup4 >= 4.13.3 breezy >= 3.3.1, < 3.3.5 # use versions with available binary wheels dateparser dulwich diff --git a/swh/lister/maven/tests/test_lister.py b/swh/lister/maven/tests/test_lister.py index 5f879de8..199b655c 100644 --- a/swh/lister/maven/tests/test_lister.py +++ b/swh/lister/maven/tests/test_lister.py @@ -356,8 +356,9 @@ def test_maven_lister_null_mtime(swh_scheduler, requests_mock, maven_index_null_ def test_maven_list_pom_bad_encoding(swh_scheduler, requests_mock): - """should continue listing when failing to decode pom file.""" - # Test failure of pom parsing by reencoding a UTF-8 pom file to a not expected one + """should successfully parse a pom file with unexpected encoding + (beautifulsoup4 >= 4.13).""" + # Test pom parsing by reencoding a UTF-8 pom file to a not expected one requests_mock.get( URL_POM_1, content=requests.get(URL_POM_1).content.decode("utf-8").encode("utf-32"), @@ -367,10 +368,14 @@ def test_maven_list_pom_bad_encoding(swh_scheduler, requests_mock): lister.run() - # If the maven_index_full step succeeded but not the pom parsing step, - # then we get only one maven-jar origin and one git origin. + # we should get one maven-jar origin and two git origins. scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results - assert len(scheduler_origins) == 2 + assert len(scheduler_origins) == 3 + + # git origin parsed from pom file with unexpected encoding + assert ("https://github.com/aldialimucaj/sprova4j", "git") in [ + (o.url, o.visit_type) for o in scheduler_origins + ] def test_maven_list_pom_multi_byte_encoding(swh_scheduler, requests_mock, datadir): -- GitLab