From 9a0a59887922d76f04a736035bed79ba511796a0 Mon Sep 17 00:00:00 2001 From: Alexander Dietrich Date: Mon, 27 Feb 2023 18:48:29 +0100 Subject: [PATCH] Add pbzip2 support --- src/icepack/__init__.py | 19 +++++---------- src/icepack/cli.py | 5 +++- src/icepack/helper.py | 48 ++++++++++++++++++++++++++++++++++++++ tests/test_helper_bzip2.py | 40 +++++++++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 14 deletions(-) create mode 100644 tests/test_helper_bzip2.py diff --git a/src/icepack/__init__.py b/src/icepack/__init__.py index 67b4225..f1c3ce0 100644 --- a/src/icepack/__init__.py +++ b/src/icepack/__init__.py @@ -1,4 +1,3 @@ -import bz2 import gzip import json import os @@ -9,13 +8,12 @@ from pydantic import ValidationError from icepack.error import InvalidArchiveError -from icepack.helper import Age, File, GZip, SSH, Zip +from icepack.helper import Age, BZip2, File, GZip, SSH, Zip from icepack.meta import SECRET_KEY, PUBLIC_KEY, ALLOWED_SIGNERS from icepack.model import Checksum, Compression, Encryption from icepack.model import DirEntry, FileEntry, Metadata -_BUFFER_SIZE = 64 * 1024 _MAX_ATTEMPTS = 3 _METADATA_RE = re.compile(r'^metadata\.(\w+)\.(\w+)$') @@ -95,7 +93,7 @@ def extract_entry(self, entry, base_path): tmp_path = self._mktemp() self._decrypt_path(age_path, tmp_path) age_path.unlink() - self._uncompress_path(tmp_path, dst_path, entry.compression) + self._decompress_path(tmp_path, dst_path, entry.compression) tmp_path.unlink() if self._mode and entry.mode is not None: dst_path.chmod(entry.mode & 0o7777) @@ -144,13 +142,10 @@ def _load_metadata(self): meta_path.unlink() sig_path.unlink() - def _uncompress_path(self, src_path, dst_path, compression): - """Uncompressed src_path to dst_path.""" + def _decompress_path(self, src_path, dst_path, compression): + """Decompress src_path to dst_path.""" if compression == Compression.BZ2: - with open(dst_path, 'wb') as dst: - with open(src_path, 'rb') as src_file: - with bz2.open(src_file, 'rb') as src: - copyfileobj(src, dst, _BUFFER_SIZE) + BZip2.decompress(src_path, dst_path) elif compression == Compression.GZ: GZip.decompress(src_path, dst_path) else: @@ -249,9 +244,7 @@ def _compress_path(self, src_path): """Return the temporary Path of the compressed src_path.""" tmp_path = self._mktemp() if self._compression == Compression.BZ2: - with open(src_path, 'rb') as src: - with bz2.open(tmp_path, 'wb') as dst: - copyfileobj(src, dst, _BUFFER_SIZE) + BZip2.compress(src_path, tmp_path) elif self._compression == Compression.GZ: GZip.compress(src_path, tmp_path) else: diff --git a/src/icepack/cli.py b/src/icepack/cli.py index 85c2f36..3c2ea6a 100644 --- a/src/icepack/cli.py +++ b/src/icepack/cli.py @@ -3,7 +3,7 @@ import click from icepack import IcepackReader, create_archive, extract_archive -from icepack.helper import Age, File, GZip, SSH +from icepack.helper import Age, BZip2, File, GZip, SSH from icepack.meta import NAME, VERSION, SECRET_KEY, PUBLIC_KEY from icepack.model import Compression @@ -170,6 +170,9 @@ def version(ctx, dependencies): pigz_version = GZip.pigz_version() if pigz_version: click.echo(f'✅ pigz found. (Version: {pigz_version})') + pbzip2_version = BZip2.pbzip2_version() + if pbzip2_version: + click.echo(f'✅ pbzip2 found. (Version: {pbzip2_version})') @icepack.group() diff --git a/src/icepack/helper.py b/src/icepack/helper.py index 9140ca3..2f45e23 100644 --- a/src/icepack/helper.py +++ b/src/icepack/helper.py @@ -1,3 +1,4 @@ +import bz2 import gzip import hashlib import os @@ -12,6 +13,8 @@ from icepack.meta import NAME, SECRET_KEY, PUBLIC_KEY, ALLOWED_SIGNERS +# If this environment variable is set to 'false', do not use pbzip2. +PBZIP2_ENV = 'ICEPACK_PBZIP2' # If this environment variable is set to 'false', do not use pigz. PIGZ_ENV = 'ICEPACK_PIGZ' @@ -184,6 +187,51 @@ def pigz_version(): return version +class BZip2(): + """bzip2 helpers.""" + @staticmethod + def compress(src_path, dst_path): + """Compress src_path to dst_path.""" + pbzip2_env = os.environ.get(PBZIP2_ENV) + if BZip2.has_pbzip2() and pbzip2_env != 'false': + with open(dst_path, 'wb') as dst: + subprocess.run( # nosec Trusted input + ['pbzip2', '-c', str(src_path)], + stdout=dst, + check=True) + else: + with open(src_path, 'rb') as src: + with bz2.open(dst_path, 'wb') as dst: + copyfileobj(src, dst, _BUFFER_SIZE) + + @staticmethod + def decompress(src_path, dst_path): + """Decompress src_path to dst_path.""" + with bz2.open(src_path, 'rb') as src: + with open(dst_path, 'wb') as dst: + copyfileobj(src, dst, _BUFFER_SIZE) + + @staticmethod + def has_pbzip2(): + """Return True if pbzip2 is available.""" + return which('pbzip2') is not None + + @staticmethod + def pbzip2_version(): + """Return the pbzip2 version, or None.""" + version = None + if BZip2.has_pbzip2(): + result = subprocess.run( # nosec Trusted input + ['pbzip2', '--version'], + capture_output=True, + text=True, + timeout=5) + if result.returncode == 0: + version = result.stderr.strip() + version = version.split('\n')[0] + return version + + class SSH(): """ssh-keygen helpers.""" diff --git a/tests/test_helper_bzip2.py b/tests/test_helper_bzip2.py new file mode 100644 index 0000000..c267aa7 --- /dev/null +++ b/tests/test_helper_bzip2.py @@ -0,0 +1,40 @@ +import os + +import pytest + +from icepack.helper import PBZIP2_ENV, BZip2, File + +from helper import src_path, dst_path + + +@pytest.fixture +def pbzip2_env(): + """Preserve the existing PBZIP2_ENV value, if any.""" + old_value = os.environ.get(PBZIP2_ENV) + yield + if old_value is not None: + os.environ[PBZIP2_ENV] = old_value + elif PBZIP2_ENV in os.environ: + del os.environ[PBZIP2_ENV] + + +def test_ensure_pbzip2(): + """Ensure that pbzip2 is available.""" + assert BZip2.has_pbzip2() is True + assert BZip2.pbzip2_version() is not None + + +def test_without_pbzip2(src_path, dst_path, pbzip2_env): + """Test compression without pbzip2.""" + os.environ[PBZIP2_ENV] = 'false' + BZip2.compress(src_path / 'foo', dst_path / 'foo.bz2') + BZip2.decompress(dst_path / 'foo.bz2', dst_path / 'foo') + assert File.sha256(dst_path / 'foo') == 'b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c' # noqa + + +def test_with_pbzip2(src_path, dst_path, pbzip2_env): + """Test compression with pbzip2.""" + os.environ[PBZIP2_ENV] = 'true' + BZip2.compress(src_path / 'foo', dst_path / 'foo.bz2') + BZip2.decompress(dst_path / 'foo.bz2', dst_path / 'foo') + assert File.sha256(dst_path / 'foo') == 'b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c' # noqa