From b6347f5f95307aec37ec85b6a52ae4476cb8c0bf Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Mon, 13 Nov 2023 23:55:11 +0100 Subject: [PATCH] Extract original filename for GZIP (.gz) archives. --- doc/changelog.txt | 4 +++- patoolib/programs/gzip.py | 36 +++++++++++++++++++++++++++++++++--- tests/archives/__init__.py | 17 +++++++++++------ tests/archives/test_gzip.py | 9 +++++++-- tests/archives/test_pigz.py | 6 +++++- 5 files changed, 59 insertions(+), 13 deletions(-) diff --git a/doc/changelog.txt b/doc/changelog.txt index 7a0c0022..07027cbf 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -19,7 +19,9 @@ module in Python >= 3.12 to detect unsafe archive entries. * Added new command "patool version" to print version information. * Do not use maximum compression anymore, the default settings should - be good enough for a standard use case. + be good enough for our standard use case. +* Restore original file name for GZIP (.gz) archives. + Closes: GH bug #67 1.15.0 (released 28.10.2023) diff --git a/patoolib/programs/gzip.py b/patoolib/programs/gzip.py index 03529ab6..d5c78ae3 100644 --- a/patoolib/programs/gzip.py +++ b/patoolib/programs/gzip.py @@ -14,12 +14,42 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Archive commands for the gzip program.""" -from . import extract_singlefile_standard, test_singlefile_standard -from .. import util +from . import test_singlefile_standard +from .. import util, fileutil, log +import os -extract_gzip = extract_compress = extract_singlefile_standard test_gzip = test_compress = test_singlefile_standard +def extract_gzip(archive, compression, cmd, verbosity, interactive, outdir): + cmdlist = [util.shell_quote(cmd)] + if verbosity > 1: + cmdlist.append('-v') + outfile = get_original_filename(cmd, outdir, archive) + cmdlist.extend(['-c', '-d', '--', util.shell_quote(archive), '>', + util.shell_quote(outfile)]) + return (cmdlist, {'shell': True}) + +extract_compress = extract_gzip + + +def get_original_filename(cmd, outdir, archive): + outfile = fileutil.get_single_outfile(outdir, archive) + cmdlist = [cmd, "--name", "--list", archive] + try: + output = util.backtick(cmdlist).strip() + lines = output.splitlines() + values = lines[1].split() + baseoutfile = os.path.basename(outfile) + basefilename = os.path.basename(values[-1]) + if os.path.normcase(baseoutfile) != os.path.normcase(basefilename): + basearchive = os.path.basename(archive) + # avoid overwriting the original archive with the same name + if os.path.normcase(basefilename) != os.path.normcase(basearchive): + outfile = os.path.join(outdir, basefilename) + except OSError as err: + log.log_error(f"could not run {cmd}: {err}") + return outfile + def create_gzip(archive, compression, cmd, verbosity, interactive, filenames): """Create a GZIP archive.""" diff --git a/tests/archives/__init__.py b/tests/archives/__init__.py index 684156fb..6080a75c 100644 --- a/tests/archives/__init__.py +++ b/tests/archives/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2010-2015 Bastian Kleineidam +# Copyright (C) 2010-2023 Bastian Kleineidam # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -72,9 +72,9 @@ def archive_extract(self, filename, check=Content.Recursive): relarchive = os.path.join("..", archive[len(basedir)+1:]) self._archive_extract(relarchive, check, verbosity=1) - def _archive_extract (self, archive, check, verbosity=0): + def _archive_extract(self, archive, check, verbosity=0): # create a temporary directory for extraction - tmpdir = patoolib.fileutil.tmpdir(dir=basedir) + tmpdir = patoolib.fileutil.tmpdir(dir=basedir, prefix="test_") try: olddir = patoolib.fileutil.chdir(tmpdir) try: @@ -87,7 +87,7 @@ def _archive_extract (self, archive, check, verbosity=0): finally: patoolib.fileutil.rmtree(tmpdir) - def check_extracted_archive (self, archive, output, check): + def check_extracted_archive(self, archive, output, check): if check == Content.Recursive: # outdir is the 't' directory of the archive self.assertEqual(output, 't') @@ -96,8 +96,7 @@ def check_extracted_archive (self, archive, output, check): self.check_textfile(txtfile, 't.txt') elif check == Content.Singlefile: # a non-existing directory to ensure files do not exist in it - ned = get_nonexisting_directory(os.getcwd()) - expected_output = os.path.basename(patoolib.fileutil.get_single_outfile(ned, archive)) + expected_output = self.get_expected_singlefile_output(archive) self.check_textfile(output, expected_output) elif check == Content.Multifile: txtfile = os.path.join(output, 't.txt') @@ -105,6 +104,12 @@ def check_extracted_archive (self, archive, output, check): txtfile2 = os.path.join(output, 't2.txt') self.check_textfile(txtfile2, 't2.txt') + def get_expected_singlefile_output(self, archive): + """Return the expected output file name of a singlefile archive.""" + if archive.endswith(".foo"): + return patoolib.fileutil.stripext(os.path.basename(archive)) + return "t.txt" + def check_directory (self, dirname, expectedname): """Check that directory exists.""" self.assertTrue(os.path.isdir(dirname), dirname) diff --git a/tests/archives/test_gzip.py b/tests/archives/test_gzip.py index fd509db6..4fbe6ca4 100644 --- a/tests/archives/test_gzip.py +++ b/tests/archives/test_gzip.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2010-2015 Bastian Kleineidam +# Copyright (C) 2010-2023 Bastian Kleineidam # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -28,6 +28,11 @@ def test_gzip (self): @needs_program('file') @needs_program(program) def test_gzip_file(self): - self.archive_commands('t.txt.gz.foo', skip_create=True, check=None) + self.archive_commands('t.txt.gz.foo', skip_create=True, check=Content.Singlefile) self.archive_extract('t.txt.Z.foo', check=Content.Singlefile) + def get_expected_singlefile_output(self, archive): + """Gzip restores the original filename for .gz files""" + if archive.endswith(".Z.foo"): + return "t.txt.Z" + return "t.txt" diff --git a/tests/archives/test_pigz.py b/tests/archives/test_pigz.py index 2cbd0287..188a3207 100644 --- a/tests/archives/test_pigz.py +++ b/tests/archives/test_pigz.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2010-2015 Bastian Kleineidam +# Copyright (C) 2010-2023 Bastian Kleineidam # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -29,3 +29,7 @@ def test_pigz (self): def test_pigz_file (self): self.archive_commands('t.txt.gz.foo', check=Content.Singlefile, skip_create=True, skip_test=True) + + def get_expected_singlefile_output(self, archive): + """pigz restores the original filename for .gz files""" + return "t.txt"