Skip to content

Commit

Permalink
Extract original filename for GZIP (.gz) archives.
Browse files Browse the repository at this point in the history
  • Loading branch information
wummel committed Nov 13, 2023
1 parent 8b3cd17 commit b6347f5
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 13 deletions.
4 changes: 3 additions & 1 deletion doc/changelog.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
module in Python >= 3.12 to detect unsafe archive entries.
* Added new command "patool version" to print version information.
* Do not use maximum compression anymore, the default settings should
be good enough for a standard use case.
be good enough for our standard use case.
* Restore original file name for GZIP (.gz) archives.
Closes: GH bug #67


1.15.0 (released 28.10.2023)
Expand Down
36 changes: 33 additions & 3 deletions patoolib/programs/gzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,42 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""Archive commands for the gzip program."""
from . import extract_singlefile_standard, test_singlefile_standard
from .. import util
from . import test_singlefile_standard
from .. import util, fileutil, log
import os

extract_gzip = extract_compress = extract_singlefile_standard
test_gzip = test_compress = test_singlefile_standard

def extract_gzip(archive, compression, cmd, verbosity, interactive, outdir):
cmdlist = [util.shell_quote(cmd)]
if verbosity > 1:
cmdlist.append('-v')
outfile = get_original_filename(cmd, outdir, archive)
cmdlist.extend(['-c', '-d', '--', util.shell_quote(archive), '>',
util.shell_quote(outfile)])
return (cmdlist, {'shell': True})

extract_compress = extract_gzip


def get_original_filename(cmd, outdir, archive):
outfile = fileutil.get_single_outfile(outdir, archive)
cmdlist = [cmd, "--name", "--list", archive]
try:
output = util.backtick(cmdlist).strip()
lines = output.splitlines()
values = lines[1].split()
baseoutfile = os.path.basename(outfile)
basefilename = os.path.basename(values[-1])
if os.path.normcase(baseoutfile) != os.path.normcase(basefilename):
basearchive = os.path.basename(archive)
# avoid overwriting the original archive with the same name
if os.path.normcase(basefilename) != os.path.normcase(basearchive):
outfile = os.path.join(outdir, basefilename)
except OSError as err:
log.log_error(f"could not run {cmd}: {err}")
return outfile


def create_gzip(archive, compression, cmd, verbosity, interactive, filenames):
"""Create a GZIP archive."""
Expand Down
17 changes: 11 additions & 6 deletions tests/archives/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2010-2015 Bastian Kleineidam
# Copyright (C) 2010-2023 Bastian Kleineidam
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -72,9 +72,9 @@ def archive_extract(self, filename, check=Content.Recursive):
relarchive = os.path.join("..", archive[len(basedir)+1:])
self._archive_extract(relarchive, check, verbosity=1)

def _archive_extract (self, archive, check, verbosity=0):
def _archive_extract(self, archive, check, verbosity=0):
# create a temporary directory for extraction
tmpdir = patoolib.fileutil.tmpdir(dir=basedir)
tmpdir = patoolib.fileutil.tmpdir(dir=basedir, prefix="test_")
try:
olddir = patoolib.fileutil.chdir(tmpdir)
try:
Expand All @@ -87,7 +87,7 @@ def _archive_extract (self, archive, check, verbosity=0):
finally:
patoolib.fileutil.rmtree(tmpdir)

def check_extracted_archive (self, archive, output, check):
def check_extracted_archive(self, archive, output, check):
if check == Content.Recursive:
# outdir is the 't' directory of the archive
self.assertEqual(output, 't')
Expand All @@ -96,15 +96,20 @@ def check_extracted_archive (self, archive, output, check):
self.check_textfile(txtfile, 't.txt')
elif check == Content.Singlefile:
# a non-existing directory to ensure files do not exist in it
ned = get_nonexisting_directory(os.getcwd())
expected_output = os.path.basename(patoolib.fileutil.get_single_outfile(ned, archive))
expected_output = self.get_expected_singlefile_output(archive)
self.check_textfile(output, expected_output)
elif check == Content.Multifile:
txtfile = os.path.join(output, 't.txt')
self.check_textfile(txtfile, 't.txt')
txtfile2 = os.path.join(output, 't2.txt')
self.check_textfile(txtfile2, 't2.txt')

def get_expected_singlefile_output(self, archive):
"""Return the expected output file name of a singlefile archive."""
if archive.endswith(".foo"):
return patoolib.fileutil.stripext(os.path.basename(archive))
return "t.txt"

def check_directory (self, dirname, expectedname):
"""Check that directory exists."""
self.assertTrue(os.path.isdir(dirname), dirname)
Expand Down
9 changes: 7 additions & 2 deletions tests/archives/test_gzip.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2010-2015 Bastian Kleineidam
# Copyright (C) 2010-2023 Bastian Kleineidam
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -28,6 +28,11 @@ def test_gzip (self):
@needs_program('file')
@needs_program(program)
def test_gzip_file(self):
self.archive_commands('t.txt.gz.foo', skip_create=True, check=None)
self.archive_commands('t.txt.gz.foo', skip_create=True, check=Content.Singlefile)
self.archive_extract('t.txt.Z.foo', check=Content.Singlefile)

def get_expected_singlefile_output(self, archive):
"""Gzip restores the original filename for .gz files"""
if archive.endswith(".Z.foo"):
return "t.txt.Z"
return "t.txt"
6 changes: 5 additions & 1 deletion tests/archives/test_pigz.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2010-2015 Bastian Kleineidam
# Copyright (C) 2010-2023 Bastian Kleineidam
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -29,3 +29,7 @@ def test_pigz (self):
def test_pigz_file (self):
self.archive_commands('t.txt.gz.foo', check=Content.Singlefile,
skip_create=True, skip_test=True)

def get_expected_singlefile_output(self, archive):
"""pigz restores the original filename for .gz files"""
return "t.txt"

0 comments on commit b6347f5

Please sign in to comment.