From 7f85bd7cea2cc493e9b29307131bab2476077c9c Mon Sep 17 00:00:00 2001 From: Antoine Lambert <anlambert@softwareheritage.org> Date: Thu, 9 Jun 2022 14:49:05 +0200 Subject: [PATCH] tarball: Use standard Python module zipfile to extract jar archive It exists many cases where using unzip to extract a jar archive fails while using the zipfile module succeeds. So prefer to use the zipfile module to uncompress jar archives. Related to T4318 --- swh/core/tarball.py | 26 ++++++++++++++++++++++++- swh/core/tests/data/archives/hello.jar | Bin 0 -> 550 bytes 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 swh/core/tests/data/archives/hello.jar diff --git a/swh/core/tarball.py b/swh/core/tarball.py index d7b01e90..e2d06b99 100644 --- a/swh/core/tarball.py +++ b/swh/core/tarball.py @@ -64,6 +64,29 @@ def _unpack_zip(zippath: str, extract_dir: str) -> str: ) +def _unpack_jar(jarpath: str, extract_dir: str) -> str: + """Unpack jar files using standard Python module zipfile. + + This expects the `extract_dir` to exist. + + Raises: + shutil.ReadError in case of issue uncompressing the archive (jarpath + does not exist, extract_dir does not exist, etc...) + + Returns: + full path to the uncompressed directory. + + """ + try: + with zipfile.ZipFile(jarpath) as jar: + jar.extractall(path=extract_dir) + return extract_dir + except Exception as e: + raise shutil.ReadError( + f"Unable to uncompress {jarpath} to {extract_dir}. Reason: {e}" + ) + + def register_new_archive_formats(): """Register new archive formats to uncompress""" registered_formats = [f[0] for f in shutil.get_unpack_formats()] @@ -80,6 +103,7 @@ _mime_to_archive_format = { "application/gzip": "gztar", "application/x-lzip": "tar.lz", "application/zip": "zip", + "application/java-archive": "jar", } @@ -192,7 +216,7 @@ def compress(tarpath, nature, dirpath_or_files): ADDITIONAL_ARCHIVE_FORMATS = [ # name, extensions, function ("tar.Z|x", [".tar.Z", ".tar.x"], _unpack_tar), - ("jar", [".jar"], _unpack_zip), + ("jar", [".jar"], _unpack_jar), ("tbz2", [".tbz", "tbz2"], _unpack_tar), # FIXME: make this optional depending on the runtime lzip package install ("tar.lz", [".tar.lz"], _unpack_tar), diff --git a/swh/core/tests/data/archives/hello.jar b/swh/core/tests/data/archives/hello.jar new file mode 100644 index 0000000000000000000000000000000000000000..7b7a00fc8fb49ecc64b0678234dd8768c369affc GIT binary patch literal 550 zcmWIWW@Zs#;Nak3@Gd<W!hi%g8CV#6T|*poJ^kGD|D9rBU}gyLX6FE@V1g<Oz^PdT zr~<Cp*U`_@%{4eg&)4m<@0rs+-nx1hdA)VD&Yd~GImqCO@q?#DdS1Rdp1v1LSFvz0 zxPIaB6*Y+onrtNe*`#u3#ZIujd^_@4%z&nv12M>6E(QjmTQ*nL_(L57rr3aFMruw@ zzCOZjJJHne!PTT?=A`PCRFs_dID7JBxW;+?Q=Si*z_#gHbF2rc;{oCTZ$>5&2Gl@; zg#aj!PyyW6pddrnitGtc5FvmqkO|j{6ks3|xEOGI7Gc6xAQR?wgo8nzNA?WJQwVSq a$b{&D1x0{2D<}+s-eO^R3#649K|BCgoMET{ literal 0 HcmV?d00001 -- GitLab