Skip to content

maven: Prevent UnicodeDecodeError when processing pom file

Pass the raw bytes of pom file content in xmltodict.parse and let it do the string decoding based on the encoding declared in pom file.

>>> import xmltodict
>>> xmltodict.expat.ExpatError
<class 'xml.parsers.expat.ExpatError'>
>>> import requests
>>> response = requests.get("https://repo1.maven.org/maven2/net/sourceforge/floggy/floggy/1.4.0/floggy-1.4.0.pom")
>>> response.content
b'<?xml version="1.0" encoding="ISO-8859-1"?>\n<!--\n\n    Copyright (c) 2006-2011 Floggy Open Source Group. All rights reserved.\n\n    Licensed under the Apache License, Version 2.0 (the "License");\n    you may not use this file except in compliance with the License.\n    You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\n    Unless required by applicable law or agreed to in writing, software\n    distributed under the License is distributed on an "AS IS" BASIS,\n    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n    See the License for the specific language governing permissions and\n    limitations under the License.\n\n-->\n<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">\n\t<modelVersion>4.0.0</modelVersion>\n\t<groupId>net.sourceforge.floggy</groupId>\n\t<artifactId>floggy</artifactId>\n\t<version>1.4.0</version>\n\t<name>Floggy Framework\'s for JME</name>\n\t<url>http://floggy.sourceforge.net</url>\n\t<description>A collection of frameworks for JME</description>\n\t<inceptionYear>2006</inceptionYear>\n\t<packaging>pom</packaging>\n\t<organization>\n\t\t<name>Floggy Open Source Group</name>\n\t\t<url>http://floggy.org/</url>\n\t</organization>\n\t<licenses>\n\t\t<license>\n\t\t\t<distribution>manual</distribution>\n\t\t\t<name>Apache License 2.0</name>\n\t\t\t<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>\n\t\t</license>\n\t</licenses>\n\t<scm>\n\t\t<connection>scm:svn:https://floggy.svn.sourceforge.net/svnroot/floggy/tags/floggy-1.4.0</connection>\n\t\t<developerConnection>scm:svn:https://floggy.svn.sourceforge.net/svnroot/floggy/tags/floggy-1.4.0</developerConnection>\n\t\t<url>http://floggy.svn.sourceforge.net/viewvc/floggy/tags/floggy-1.4.0</url>\n\t</scm>\n\t<repositories>\n\t\t<repository>\n\t\t\t<id>sonatype-snapshot</id>\n\t\t\t<url>http://oss.sonatype.org/content/repositories/sourceforge-snapshots</url>\n\t\t\t<snapshots>\n\t\t\t\t<enabled>true</enabled>\n\t\t\t</snapshots>\n\t\t\t<releases>\n\t\t\t\t<enabled>false</enabled>\n\t\t\t</releases>\n\t\t</repository>\n\t</repositories>\n\t<pluginRepositories>\n\t\t<pluginRepository>\n\t\t\t<id>sonatype-plugin-snapshot</id>\n\t\t\t<url>http://oss.sonatype.org/content/repositories/sourceforge-snapshots</url>\n\t\t\t<snapshots>\n\t\t\t\t<enabled>true</enabled>\n\t\t\t</snapshots>\n\t\t\t<releases>\n\t\t\t\t<enabled>false</enabled>\n\t\t\t</releases>\n\t\t</pluginRepository>\n\t</pluginRepositories>\n\t<issueManagement>\n\t\t<system>sourceforge</system>\n\t\t<url>http://sourceforge.net/tracker/?group_id=139426</url>\n\t</issueManagement>\n\t<ciManagement>\n\t\t<system>parabuild</system>\n\t\t<url>http://parabuild.viewtier.com:8080/parabuild/index.htm?cid=29a4&amp;view=detailed&amp;buildid=12739&amp;displaygroupid=-1</url>\n\t</ciManagement>\n\t<distributionManagement>\n\t\t<site>\n\t\t\t<id>sourceforge</id>\n\t\t\t<name>SourceForge.net</name>\n\t\t\t<url>scp://shell.sourceforge.net:/home/groups/f/fl/floggy/htdocs/modules/${project.artifactId}/${project.version}</url>\n\t\t</site>\n\t\t<repository>\n\t\t\t<id>sonatype</id>\n\t\t\t<name>Sonatype.org</name>\n\t\t\t<url>http://oss.sonatype.org/service/local/staging/deploy/maven2</url>\n\t\t</repository>\n\t\t<snapshotRepository>\n\t\t\t<id>sonatype</id>\n\t\t\t<name>Sonatype.org</name>\n\t\t\t<url>http://oss.sonatype.org/content/repositories/sourceforge-snapshots</url>\n\t\t</snapshotRepository>\n\t</distributionManagement>\n\t<mailingLists>\n\t\t<mailingList>\n\t\t\t<name>Developer\'s mailing list</name>\n\t\t\t<archive>http://sourceforge.net/mailarchive/forum.php?forum=floggy-developer</archive>\n\t\t\t<post>floggy-developer@lists.sourceforge.net</post>\n\t\t\t<subscribe>https://lists.sourceforge.net/lists/listinfo/floggy-developer</subscribe>\n\t\t\t<unsubscribe>https://lists.sourceforge.net/lists/listinfo/floggy-developer</unsubscribe>\n\t\t</mailingList>\n\t\t<mailingList>\n\t\t\t<name>User\'s mailing list</name>\n\t\t\t<archive>http://sourceforge.net/mailarchive/forum.php?forum=floggy-user</archive>\n\t\t\t<post>floggy-user@lists.sourceforge.net</post>\n\t\t\t<subscribe>https://lists.sourceforge.net/lists/listinfo/floggy-user</subscribe>\n\t\t\t<unsubscribe>https://lists.sourceforge.net/lists/listinfo/floggy-user</unsubscribe>\n\t\t</mailingList>\n\t</mailingLists>\n\t<developers>\n\t\t<developer>\n\t\t\t<name>Thiago Le\xe3o Moreira</name>\n\t\t\t<id>thiagolm</id>\n\t\t\t<email>thiago.moreira@floggy.org</email>\n\t\t\t<organization>Floggy Open Source Group</organization>\n\t\t\t<roles>\n\t\t\t\t<role>Java Developer</role>\n\t\t\t\t<role>Release Manager</role>\n\t\t\t</roles>\n\t\t\t<timezone>-3</timezone>\n\t\t</developer>\n\t\t<developer>\n\t\t\t<name>Thiago Rossato</name>\n\t\t\t<id>thiagorossato</id>\n\t\t\t<email>thiago.rossato@floggy.org</email>\n\t\t\t<organization>Floggy Open Source Group</organization>\n\t\t\t<roles>\n\t\t\t\t<role>Java Developer</role>\n\t\t\t</roles>\n\t\t\t<timezone>-3</timezone>\n\t\t</developer>\n\t\t<developer>\n\t\t\t<name>Priscila Tavares Lugon</name>\n\t\t\t<id>priscilalugon</id>\n\t\t\t<email>priscila.lugon@floggy.org</email>\n\t\t\t<organization>Floggy Open Source Group</organization>\n\t\t\t<roles>\n\t\t\t\t<role>Java Developer</role>\n\t\t\t</roles>\n\t\t\t<timezone>-3</timezone>\n\t\t</developer>\n\t\t<developer>\n\t\t\t<name>Dan Murphy</name>\n\t\t\t<id>dgem</id>\n\t\t\t<email>dan.murphy@floggy.org</email>\n\t\t\t<organization>Floggy Open Source Group</organization>\n\t\t\t<roles>\n\t\t\t\t<role>Eclipse Developer</role>\n\t\t\t</roles>\n\t\t\t<timezone>+0</timezone>\n\t\t</developer>\n\t\t<developer>\n\t\t\t<name>Frederico Jos\xe9 de Souza Filho</name>\n\t\t\t<id>fredjsf</id>\n\t\t\t<email>frederico.souza@floggy.org</email>\n\t\t\t<organization>Floggy Open Source Group</organization>\n\t\t\t<roles>\n\t\t\t\t<role>Java Developer</role>\n\t\t\t</roles>\n\t\t\t<timezone>-3</timezone>\n\t\t</developer>\n\t</developers>\n\t<build>\n\t\t<plugins>\n\t\t\t<plugin>\n\t\t\t\t<groupId>com.mycila.maven-license-plugin</groupId>\n\t\t\t\t<artifactId>maven-license-plugin</artifactId>\n\t\t\t\t<version>1.9.0</version>\n\t\t\t\t<inherited>true</inherited>\n\t\t\t\t<dependencies>\n\t\t\t\t\t<dependency>\n\t\t\t\t\t\t<groupId>net.sourceforge.floggy</groupId>\n\t\t\t\t\t\t<artifactId>build-tools</artifactId>\n\t\t\t\t\t\t<version>1.4.0-SNAPSHOT</version>\n\t\t\t\t\t</dependency>\n\t\t\t\t</dependencies>\n\t\t\t\t<configuration>\n\t\t\t\t\t<header>license-header-template.txt</header>\n\t\t\t\t\t<includes>\n\t\t\t\t\t\t<include>src/**</include>\n\t\t\t\t\t</includes>\n\t\t\t\t\t<excludes>\n\t\t\t\t\t\t<exclude>target/**</exclude>\n\t\t\t\t\t\t<exclude>**/license-header-template.txt</exclude>\n\t\t\t\t\t</excludes>\n\t\t\t\t\t<properties>\n\t\t\t\t\t\t<year>2011</year>\n\t\t\t\t\t\t<organizationName>${project.organization.name}</organizationName>\n\t\t\t\t\t</properties>\n\t\t\t\t</configuration>\n\t\t\t</plugin>\n\t\t\t<plugin>\n\t\t\t\t<groupId>org.apache.maven.plugins</groupId>\n\t\t\t\t<artifactId>maven-checkstyle-plugin</artifactId>\n\t\t\t\t<version>2.6</version>\n\t\t\t\t<inherited>true</inherited>\n\t\t\t\t<dependencies>\n\t\t\t\t\t<dependency>\n\t\t\t\t\t\t<groupId>net.sourceforge.floggy</groupId>\n\t\t\t\t\t\t<artifactId>build-tools</artifactId>\n\t\t\t\t\t\t<version>1.4.0-SNAPSHOT</version>\n\t\t\t\t\t</dependency>\n\t\t\t\t</dependencies>\n\t\t\t\t<configuration>\n\t\t\t\t\t<configLocation>checkstyle-checks-configuration.xml</configLocation>\n\t\t\t\t</configuration>\n\t\t\t</plugin>\n\t\t\t<plugin>\n\t\t\t\t<groupId>org.apache.maven.plugins</groupId>\n\t\t\t\t<artifactId>maven-compiler-plugin</artifactId>\n\t\t\t\t<version>2.3.2</version>\n\t\t\t\t<inherited>true</inherited>\n\t\t\t\t<configuration>\n\t\t\t\t\t<source>1.3</source>\n\t\t\t\t\t<target>1.5</target>\n\t\t\t\t</configuration>\n\t\t\t</plugin>\n\t\t\t<plugin>\n\t\t\t\t<groupId>org.apache.maven.plugins</groupId>\n\t\t\t\t<artifactId>maven-deploy-plugin</artifactId>\n\t\t\t\t<version>2.5</version>\n\t\t\t\t<inherited>true</inherited>\n\t\t\t</plugin>\n\t\t\t<plugin>\n\t\t\t\t<groupId>org.apache.maven.plugins</groupId>\n\t\t\t\t<artifactId>maven-enforcer-plugin</artifactId>\n\t\t\t\t<version>1.0</version>\n\t\t\t\t<executions>\n\t\t\t\t\t<execution>\n\t\t\t\t\t\t<id>enforce-versions</id>\n\t\t\t\t\t\t<goals>\n\t\t\t\t\t\t\t<goal>enforce</goal>\n\t\t\t\t\t\t</goals>\n\t\t\t\t\t\t<configuration>\n\t\t\t\t\t\t\t<rules>\n\t\t\t\t\t\t\t\t<requireMavenVersion>\n\t\t\t\t\t\t\t\t\t<version>2.2.0</version>\n\t\t\t\t\t\t\t\t</requireMavenVersion>\n\t\t\t\t\t\t\t\t<requireJavaVersion>\n\t\t\t\t\t\t\t\t\t<version>1.4</version>\n\t\t\t\t\t\t\t\t</requireJavaVersion>\n\t\t\t\t\t\t\t</rules>\n\t\t\t\t\t\t</configuration>\n\t\t\t\t\t</execution>\n\t\t\t\t</executions>\n\t\t\t</plugin>\n\t\t\t<plugin>\n\t\t\t\t<groupId>org.apache.maven.plugins</groupId>\n\t\t\t\t<artifactId>maven-install-plugin</artifactId>\n\t\t\t\t<version>2.3.1</version>\n\t\t\t\t<inherited>true</inherited>\n\t\t\t</plugin>\n\t\t\t<plugin>\n\t\t\t\t<groupId>org.apache.maven.plugins</groupId>\n\t\t\t\t<artifactId>maven-gpg-plugin</artifactId>\n\t\t\t\t<version>1.1</version>\n\t\t\t\t<executions>\n\t\t\t\t\t<execution>\n\t\t\t\t\t\t<id>sign-artifacts</id>\n\t\t\t\t\t\t<phase>verify</phase>\n\t\t\t\t\t\t<goals>\n\t\t\t\t\t\t\t<goal>sign</goal>\n\t\t\t\t\t\t</goals>\n\t\t\t\t\t</execution>\n\t\t\t\t</executions>\n\t\t\t</plugin>\n\t\t\t<plugin>\n\t\t\t\t<groupId>org.apache.maven.plugins</groupId>\n\t\t\t\t<artifactId>maven-release-plugin</artifactId>\n\t\t\t\t<version>2.1</version>\n\t\t\t\t<inherited>true</inherited>\n\t\t\t\t<configuration>\n\t\t\t\t\t<tagBase>https://floggy.svn.sourceforge.net/svnroot/floggy/tags</tagBase>\n\t\t\t\t</configuration>\n\t\t\t</plugin>\n\t\t\t<plugin>\n\t\t\t\t<groupId>org.apache.maven.plugins</groupId>\n\t\t\t\t<artifactId>maven-resources-plugin</artifactId>\n\t\t\t\t<version>2.4.3</version>\n\t\t\t\t<inherited>true</inherited>\n\t\t\t</plugin>\n\t\t\t<plugin>\n\t\t\t\t<groupId>org.apache.maven.plugins</groupId>\n\t\t\t\t<artifactId>maven-site-plugin</artifactId>\n\t\t\t\t<version>3.0-beta-3</version>\n\t\t\t\t<configuration>\n\t\t\t\t\t<reportPlugins>\n\t\t\t\t\t\t<plugin>\n\t\t\t\t\t\t\t<groupId>org.apache.maven.plugins</groupId>\n\t\t\t\t\t\t\t<artifactId>maven-checkstyle-plugin</artifactId>\n\t\t\t\t\t\t\t<version>2.6</version>\n\t\t\t\t\t\t\t<configuration>\n\t\t\t\t\t\t\t\t<configLocation>checkstyle-checks-configuration.xml</configLocation>\n\t\t\t\t\t\t\t</configuration>\n\t\t\t\t\t\t</plugin>\n\t\t\t\t\t\t<plugin>\n\t\t\t\t\t\t\t<groupId>org.apache.maven.plugins</groupId>\n\t\t\t\t\t\t\t<artifactId>maven-javadoc-plugin</artifactId>\n\t\t\t\t\t\t\t<version>2.7</version>\n\t\t\t\t\t\t\t<configuration>\n\t\t\t\t\t\t\t\t<minmemory>128m</minmemory>\n\t\t\t\t\t\t\t\t<maxmemory>512</maxmemory>\n\t\t\t\t\t\t\t\t<stylesheet>maven</stylesheet>\n\t\t\t\t\t\t\t\t<excludePackageNames>*.internal</excludePackageNames>\n\t\t\t\t\t\t\t</configuration>\n\t\t\t\t\t\t</plugin>\n\t\t\t\t\t\t<plugin>\n\t\t\t\t\t\t\t<groupId>org.apache.maven.plugins</groupId>\n\t\t\t\t\t\t\t<artifactId>maven-jxr-plugin</artifactId>\n\t\t\t\t\t\t\t<version>2.2</version>\n\t\t\t\t\t\t\t<configuration>\n\t\t\t\t\t\t\t\t<linkJavadoc>true</linkJavadoc>\n\t\t\t\t\t\t\t\t<javadocDir>/apidocs/</javadocDir>\n\t\t\t\t\t\t\t</configuration>\n\t\t\t\t\t\t</plugin>\n\t\t\t\t\t\t<plugin>\n\t\t\t\t\t\t\t<groupId>org.apache.maven.plugins</groupId>\n\t\t\t\t\t\t\t<artifactId>maven-project-info-reports-plugin</artifactId>\n\t\t\t\t\t\t\t<version>2.3.1</version>\n\t\t\t\t\t\t</plugin>\n\t\t\t\t\t\t<plugin>\n\t\t\t\t\t\t\t<groupId>org.apache.maven.plugins</groupId>\n\t\t\t\t\t\t\t<artifactId>maven-surefire-report-plugin</artifactId>\n\t\t\t\t\t\t\t<version>2.7.2</version>\n\t\t\t\t\t\t</plugin>\n\t\t\t\t\t</reportPlugins>\n\t\t\t\t</configuration>\n\t\t\t</plugin>\n\t\t\t<plugin>\n\t\t\t\t<groupId>org.apache.maven.plugins</groupId>\n\t\t\t\t<artifactId>maven-surefire-plugin</artifactId>\n\t\t\t\t<version>2.7.2</version>\n\t\t\t\t<inherited>true</inherited>\n\t\t\t</plugin>\n\t\t\t<plugin>\n\t\t\t\t<groupId>org.codehaus.mojo</groupId>\n\t\t\t\t<artifactId>jalopy-maven-plugin</artifactId>\n\t\t\t\t<version>1.0-alpha-1</version>\n\t\t\t\t<inherited>true</inherited>\n\t\t\t\t<dependencies>\n\t\t\t\t\t<dependency>\n\t\t\t\t\t\t<groupId>net.sourceforge.floggy</groupId>\n\t\t\t\t\t\t<artifactId>build-tools</artifactId>\n\t\t\t\t\t\t<version>1.4.0-SNAPSHOT</version>\n\t\t\t\t\t</dependency>\n\t\t\t\t</dependencies>\n\t\t\t\t<configuration>\n\t\t\t\t\t<convention>jalopy-code-convetion.xml</convention>\n\t\t\t\t\t<srcExcludesPattern>**\\Freezed.java</srcExcludesPattern>\n\t\t\t\t</configuration>\n\t\t\t</plugin>\n\t\t</plugins>\n\t</build>\n\t<modules>\n\t\t<module>build-tools</module>\n\t\t<module>floggy-persistence-framework</module>\n\t\t<module>floggy-persistence-framework-impl</module>\n\t\t<module>floggy-persistence-weaver</module>\n\t\t<module>maven-floggy-plugin</module>\n\t\t<module>floggy-persistence-test</module>\n\t\t<module>floggy-persistence-demo</module>\n\t\t<module>barbecue-calculator</module>\n\t\t<module>eclipse-floggy-plugin</module>\n\t\t<module>eclipse-floggy-feature</module>\n\t\t<module>floggy-site-skin</module>\n\t\t<module>floggy-site</module>\n\t</modules>\n\t<properties>\n\t\t<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>\n\t\t<floggy.storepass>f10ggy</floggy.storepass>\n\t</properties>\n</project>\n'
>>> xmltodict.parse(response.content.decode())
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe3 in position 4225: invalid continuation byte
>>> xmltodict.parse(response.content)
OrderedDict([('project', OrderedDict([('@xmlns', 'http://maven.apache.org/POM/4.0.0'), ('@xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance'), ('@xsi:schemaLocation', 'http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd'), ('modelVersion', '4.0.0'), ('groupId', 'net.sourceforge.floggy'), ('artifactId', 'floggy'), ('version', '1.4.0'), ('name', "Floggy Framework's for JME"), ('url', 'http://floggy.sourceforge.net'), ('description', 'A collection of frameworks for JME'), ('inceptionYear', '2006'), ('packaging', 'pom'), ('organization', OrderedDict([('name', 'Floggy Open Source Group'), ('url', 'http://floggy.org/')])), ('licenses', OrderedDict([('license', OrderedDict([('distribution', 'manual'), ('name', 'Apache License 2.0'), ('url', 'http://www.apache.org/licenses/LICENSE-2.0.txt')]))])), ('scm', OrderedDict([('connection', 'scm:svn:https://floggy.svn.sourceforge.net/svnroot/floggy/tags/floggy-1.4.0'), ('developerConnection', 'scm:svn:https://floggy.svn.sourceforge.net/svnroot/floggy/tags/floggy-1.4.0'), ('url', 'http://floggy.svn.sourceforge.net/viewvc/floggy/tags/floggy-1.4.0')])), ('repositories', OrderedDict([('repository', OrderedDict([('id', 'sonatype-snapshot'), ('url', 'http://oss.sonatype.org/content/repositories/sourceforge-snapshots'), ('snapshots', OrderedDict([('enabled', 'true')])), ('releases', OrderedDict([('enabled', 'false')]))]))])), ('pluginRepositories', OrderedDict([('pluginRepository', OrderedDict([('id', 'sonatype-plugin-snapshot'), ('url', 'http://oss.sonatype.org/content/repositories/sourceforge-snapshots'), ('snapshots', OrderedDict([('enabled', 'true')])), ('releases', OrderedDict([('enabled', 'false')]))]))])), ('issueManagement', OrderedDict([('system', 'sourceforge'), ('url', 'http://sourceforge.net/tracker/?group_id=139426')])), ('ciManagement', OrderedDict([('system', 'parabuild'), ('url', 'http://parabuild.viewtier.com:8080/parabuild/index.htm?cid=29a4&view=detailed&buildid=12739&displaygroupid=-1')])), ('distributionManagement', OrderedDict([('site', OrderedDict([('id', 'sourceforge'), ('name', 'SourceForge.net'), ('url', 'scp://shell.sourceforge.net:/home/groups/f/fl/floggy/htdocs/modules/${project.artifactId}/${project.version}')])), ('repository', OrderedDict([('id', 'sonatype'), ('name', 'Sonatype.org'), ('url', 'http://oss.sonatype.org/service/local/staging/deploy/maven2')])), ('snapshotRepository', OrderedDict([('id', 'sonatype'), ('name', 'Sonatype.org'), ('url', 'http://oss.sonatype.org/content/repositories/sourceforge-snapshots')]))])), ('mailingLists', OrderedDict([('mailingList', [OrderedDict([('name', "Developer's mailing list"), ('archive', 'http://sourceforge.net/mailarchive/forum.php?forum=floggy-developer'), ('post', 'floggy-developer@lists.sourceforge.net'), ('subscribe', 'https://lists.sourceforge.net/lists/listinfo/floggy-developer'), ('unsubscribe', 'https://lists.sourceforge.net/lists/listinfo/floggy-developer')]), OrderedDict([('name', "User's mailing list"), ('archive', 'http://sourceforge.net/mailarchive/forum.php?forum=floggy-user'), ('post', 'floggy-user@lists.sourceforge.net'), ('subscribe', 'https://lists.sourceforge.net/lists/listinfo/floggy-user'), ('unsubscribe', 'https://lists.sourceforge.net/lists/listinfo/floggy-user')])])])), ('developers', OrderedDict([('developer', [OrderedDict([('name', 'Thiago Leão Moreira'), ('id', 'thiagolm'), ('email', 'thiago.moreira@floggy.org'), ('organization', 'Floggy Open Source Group'), ('roles', OrderedDict([('role', ['Java Developer', 'Release Manager'])])), ('timezone', '-3')]), OrderedDict([('name', 'Thiago Rossato'), ('id', 'thiagorossato'), ('email', 'thiago.rossato@floggy.org'), ('organization', 'Floggy Open Source Group'), ('roles', OrderedDict([('role', 'Java Developer')])), ('timezone', '-3')]), OrderedDict([('name', 'Priscila Tavares Lugon'), ('id', 'priscilalugon'), ('email', 'priscila.lugon@floggy.org'), ('organization', 'Floggy Open Source Group'), ('roles', OrderedDict([('role', 'Java Developer')])), ('timezone', '-3')]), OrderedDict([('name', 'Dan Murphy'), ('id', 'dgem'), ('email', 'dan.murphy@floggy.org'), ('organization', 'Floggy Open Source Group'), ('roles', OrderedDict([('role', 'Eclipse Developer')])), ('timezone', '+0')]), OrderedDict([('name', 'Frederico José de Souza Filho'), ('id', 'fredjsf'), ('email', 'frederico.souza@floggy.org'), ('organization', 'Floggy Open Source Group'), ('roles', OrderedDict([('role', 'Java Developer')])), ('timezone', '-3')])])])), ('build', OrderedDict([('plugins', OrderedDict([('plugin', [OrderedDict([('groupId', 'com.mycila.maven-license-plugin'), ('artifactId', 'maven-license-plugin'), ('version', '1.9.0'), ('inherited', 'true'), ('dependencies', OrderedDict([('dependency', OrderedDict([('groupId', 'net.sourceforge.floggy'), ('artifactId', 'build-tools'), ('version', '1.4.0-SNAPSHOT')]))])), ('configuration', OrderedDict([('header', 'license-header-template.txt'), ('includes', OrderedDict([('include', 'src/**')])), ('excludes', OrderedDict([('exclude', ['target/**', '**/license-header-template.txt'])])), ('properties', OrderedDict([('year', '2011'), ('organizationName', '${project.organization.name}')]))]))]), OrderedDict([('groupId', 'org.apache.maven.plugins'), ('artifactId', 'maven-checkstyle-plugin'), ('version', '2.6'), ('inherited', 'true'), ('dependencies', OrderedDict([('dependency', OrderedDict([('groupId', 'net.sourceforge.floggy'), ('artifactId', 'build-tools'), ('version', '1.4.0-SNAPSHOT')]))])), ('configuration', OrderedDict([('configLocation', 'checkstyle-checks-configuration.xml')]))]), OrderedDict([('groupId', 'org.apache.maven.plugins'), ('artifactId', 'maven-compiler-plugin'), ('version', '2.3.2'), ('inherited', 'true'), ('configuration', OrderedDict([('source', '1.3'), ('target', '1.5')]))]), OrderedDict([('groupId', 'org.apache.maven.plugins'), ('artifactId', 'maven-deploy-plugin'), ('version', '2.5'), ('inherited', 'true')]), OrderedDict([('groupId', 'org.apache.maven.plugins'), ('artifactId', 'maven-enforcer-plugin'), ('version', '1.0'), ('executions', OrderedDict([('execution', OrderedDict([('id', 'enforce-versions'), ('goals', OrderedDict([('goal', 'enforce')])), ('configuration', OrderedDict([('rules', OrderedDict([('requireMavenVersion', OrderedDict([('version', '2.2.0')])), ('requireJavaVersion', OrderedDict([('version', '1.4')]))]))]))]))]))]), OrderedDict([('groupId', 'org.apache.maven.plugins'), ('artifactId', 'maven-install-plugin'), ('version', '2.3.1'), ('inherited', 'true')]), OrderedDict([('groupId', 'org.apache.maven.plugins'), ('artifactId', 'maven-gpg-plugin'), ('version', '1.1'), ('executions', OrderedDict([('execution', OrderedDict([('id', 'sign-artifacts'), ('phase', 'verify'), ('goals', OrderedDict([('goal', 'sign')]))]))]))]), OrderedDict([('groupId', 'org.apache.maven.plugins'), ('artifactId', 'maven-release-plugin'), ('version', '2.1'), ('inherited', 'true'), ('configuration', OrderedDict([('tagBase', 'https://floggy.svn.sourceforge.net/svnroot/floggy/tags')]))]), OrderedDict([('groupId', 'org.apache.maven.plugins'), ('artifactId', 'maven-resources-plugin'), ('version', '2.4.3'), ('inherited', 'true')]), OrderedDict([('groupId', 'org.apache.maven.plugins'), ('artifactId', 'maven-site-plugin'), ('version', '3.0-beta-3'), ('configuration', OrderedDict([('reportPlugins', OrderedDict([('plugin', [OrderedDict([('groupId', 'org.apache.maven.plugins'), ('artifactId', 'maven-checkstyle-plugin'), ('version', '2.6'), ('configuration', OrderedDict([('configLocation', 'checkstyle-checks-configuration.xml')]))]), OrderedDict([('groupId', 'org.apache.maven.plugins'), ('artifactId', 'maven-javadoc-plugin'), ('version', '2.7'), ('configuration', OrderedDict([('minmemory', '128m'), ('maxmemory', '512'), ('stylesheet', 'maven'), ('excludePackageNames', '*.internal')]))]), OrderedDict([('groupId', 'org.apache.maven.plugins'), ('artifactId', 'maven-jxr-plugin'), ('version', '2.2'), ('configuration', OrderedDict([('linkJavadoc', 'true'), ('javadocDir', '/apidocs/')]))]), OrderedDict([('groupId', 'org.apache.maven.plugins'), ('artifactId', 'maven-project-info-reports-plugin'), ('version', '2.3.1')]), OrderedDict([('groupId', 'org.apache.maven.plugins'), ('artifactId', 'maven-surefire-report-plugin'), ('version', '2.7.2')])])]))]))]), OrderedDict([('groupId', 'org.apache.maven.plugins'), ('artifactId', 'maven-surefire-plugin'), ('version', '2.7.2'), ('inherited', 'true')]), OrderedDict([('groupId', 'org.codehaus.mojo'), ('artifactId', 'jalopy-maven-plugin'), ('version', '1.0-alpha-1'), ('inherited', 'true'), ('dependencies', OrderedDict([('dependency', OrderedDict([('groupId', 'net.sourceforge.floggy'), ('artifactId', 'build-tools'), ('version', '1.4.0-SNAPSHOT')]))])), ('configuration', OrderedDict([('convention', 'jalopy-code-convetion.xml'), ('srcExcludesPattern', '**\\Freezed.java')]))])])]))])), ('modules', OrderedDict([('module', ['build-tools', 'floggy-persistence-framework', 'floggy-persistence-framework-impl', 'floggy-persistence-weaver', 'maven-floggy-plugin', 'floggy-persistence-test', 'floggy-persistence-demo', 'barbecue-calculator', 'eclipse-floggy-plugin', 'eclipse-floggy-feature', 'floggy-site-skin', 'floggy-site'])])), ('properties', OrderedDict([('project.build.sourceEncoding', 'UTF-8'), ('floggy.storepass', 'f10ggy')]))]))])

If the string decoding failed due to an invalid declared encoding, xml.parsers.expat.ExpatError will be raised and will be caught by the lister, ignoring the pom file and continuing listing.

>>> response = requests.get("https://repo.clojars.org/joodo/lein-joodo/0.9.0/lein-joodo-0.9.0.pom")
>>> response.content
b'<?xml version="1.0" encoding="UTF-8"?>\n<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"\n    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">\n  <modelVersion>4.0.0</modelVersion>\n  <groupId>joodo</groupId>\n  <artifactId>lein-joodo</artifactId>\n  <version>0.9.0</version>\n  <name>lein-joodo</name>\n  <description>Leiningen Plugin for Joodo, a Clojure framework for web apps.</description>\n  <licenses>\n    <license>\n      <name>The MIT License</name>\n      <url>file://LICENSE</url>\n      <distribution>repo</distribution>\n      <comments>Copyright \xa9 2011-2012 Micah Martin All Rights Reserved.</comments>\n    </license>\n  </licenses>\n  <build>\n    <sourceDirectory>src</sourceDirectory>\n    <testSourceDirectory>spec</testSourceDirectory>\n    <resources>\n      <resource>\n        <directory>resources</directory>\n      </resource>\n    </resources>\n    <testResources>\n      <testResource>\n        <directory>test-resources</directory>\n      </testResource>\n    </testResources>\n  </build>\n  <repositories>\n    <repository>\n      <id>central</id>\n      <url>http://repo1.maven.org/maven2</url>\n    </repository>\n    <repository>\n      <id>clojars</id>\n      <url>http://clojars.org/repo/</url>\n    </repository>\n  </repositories>\n  <dependencies>\n    <dependency>\n      <groupId>speclj</groupId>\n      <artifactId>speclj</artifactId>\n      <version>2.2.0</version>\n      <scope>test</scope>\n    </dependency>\n    <dependency>\n      <groupId>filecabinet</groupId>\n      <artifactId>filecabinet</artifactId>\n      <version>1.0.4</version>\n      <scope>test</scope>\n    </dependency>\n    <dependency>\n      <groupId>org.clojure</groupId>\n      <artifactId>clojure</artifactId>\n      <version>1.4.0</version>\n    </dependency>\n    <dependency>\n      <groupId>filecabinet</groupId>\n      <artifactId>filecabinet</artifactId>\n      <version>1.0.4</version>\n    </dependency>\n    <dependency>\n      <groupId>mmargs</groupId>\n      <artifactId>mmargs</artifactId>\n      <version>1.2.0</version>\n    </dependency>\n  </dependencies>\n</project>\n\n<!-- This file was autogenerated by Leiningen.\n  Please do not edit it directly; instead edit project.clj and regenerate it.\n  It should not be considered canonical data. For more information see\n  https://github.com/technomancy/leiningen -->\n'
>>> xmltodict.parse(response.content.decode())
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa9 in position 654: invalid start byte
>>> xmltodict.parse(response.content)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/home/anlambert/.virtualenvs/swh/lib/python3.9/site-packages/xmltodict.py", line 327, in parse
    parser.Parse(xml_input, True)
xml.parsers.expat.ExpatError: not well-formed (invalid token): line 15, column 26

Related to T3874

Fixes SWH-LISTER-5J


Migrated from D7721 (view on Phabricator)

Merge request reports