diff --git a/requirements.txt b/requirements.txt index be4c6fc62a4f652471e8d534595a81042f925fdf..bd20daf62ad04d5531016c9c0c63c100feac3cae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -beautifulsoup4 +beautifulsoup4 >= 4.13.3 breezy >= 3.3.1, < 3.3.5 # use versions with available binary wheels dateparser dulwich diff --git a/swh/lister/maven/tests/test_lister.py b/swh/lister/maven/tests/test_lister.py index 5f879de85eb5cb174d8513e3ad19778231da0b79..199b655cd2b7717f4de2c878751a541a5f1aba21 100644 --- a/swh/lister/maven/tests/test_lister.py +++ b/swh/lister/maven/tests/test_lister.py @@ -356,8 +356,9 @@ def test_maven_lister_null_mtime(swh_scheduler, requests_mock, maven_index_null_ def test_maven_list_pom_bad_encoding(swh_scheduler, requests_mock): - """should continue listing when failing to decode pom file.""" - # Test failure of pom parsing by reencoding a UTF-8 pom file to a not expected one + """should successfully parse a pom file with unexpected encoding + (beautifulsoup4 >= 4.13).""" + # Test pom parsing by reencoding a UTF-8 pom file to a not expected one requests_mock.get( URL_POM_1, content=requests.get(URL_POM_1).content.decode("utf-8").encode("utf-32"), @@ -367,10 +368,14 @@ def test_maven_list_pom_bad_encoding(swh_scheduler, requests_mock): lister.run() - # If the maven_index_full step succeeded but not the pom parsing step, - # then we get only one maven-jar origin and one git origin. + # we should get one maven-jar origin and two git origins. scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results - assert len(scheduler_origins) == 2 + assert len(scheduler_origins) == 3 + + # git origin parsed from pom file with unexpected encoding + assert ("https://github.com/aldialimucaj/sprova4j", "git") in [ + (o.url, o.visit_type) for o in scheduler_origins + ] def test_maven_list_pom_multi_byte_encoding(swh_scheduler, requests_mock, datadir):