From 88af8852db0db8bc71eba0fe2b2b4159fd1054d7 Mon Sep 17 00:00:00 2001 From: Mario Loriedo Date: Fri, 9 Feb 2024 17:31:01 +0100 Subject: [PATCH 01/12] Refactor machine decompress.go Added some tests to verify that files extractions works with different compression format. Created a decompressor interface with 2 main methods: reader(): returns an io.Reader for the specific compression algorithm copy(): extracts the compressed file into the file provided as param Created 5 decompressor types: - gzip: extract gzip files - xz: extract xz files - zip: extract zip files - generic: extract any other file using github.com/containers/image/v5/pkg/compression - uncompressed: only do a copy of the file Minor fix to the progress bar instances: added a call to bar.Abort(false) that happens before Progress.Wait() to avoid that it hangs when a bar is not set as completed although extraction is done. Signed-off-by: Mario Loriedo --- pkg/machine/compression/compression_test.go | 106 +++++- pkg/machine/compression/decompress.go | 335 ++++-------------- pkg/machine/compression/generic.go | 54 +++ pkg/machine/compression/gzip.go | 56 +++ pkg/machine/compression/testfiles/sample.bz2 | Bin 0 -> 46 bytes pkg/machine/compression/testfiles/sample.gz | Bin 0 -> 31 bytes .../compression/testfiles/sample.uncompressed | 1 + pkg/machine/compression/testfiles/sample.xz | Bin 0 -> 60 bytes pkg/machine/compression/testfiles/sample.zip | Bin 0 -> 164 bytes pkg/machine/compression/testfiles/sample.zst | Bin 0 -> 18 bytes pkg/machine/compression/uncompressed.go | 45 +++ pkg/machine/compression/xz.go | 87 +++++ pkg/machine/compression/zip.go | 57 +++ 13 files changed, 473 insertions(+), 268 deletions(-) create mode 100644 pkg/machine/compression/generic.go create mode 100644 pkg/machine/compression/gzip.go create mode 100644 pkg/machine/compression/testfiles/sample.bz2 create mode 100644 pkg/machine/compression/testfiles/sample.gz create mode 100644 pkg/machine/compression/testfiles/sample.uncompressed create mode 100644 pkg/machine/compression/testfiles/sample.xz create mode 100644 pkg/machine/compression/testfiles/sample.zip create mode 100644 pkg/machine/compression/testfiles/sample.zst create mode 100644 pkg/machine/compression/uncompressed.go create mode 100644 pkg/machine/compression/xz.go create mode 100644 pkg/machine/compression/zip.go diff --git a/pkg/machine/compression/compression_test.go b/pkg/machine/compression/compression_test.go index 4accca18c6..d66e0a07f8 100644 --- a/pkg/machine/compression/compression_test.go +++ b/pkg/machine/compression/compression_test.go @@ -1,6 +1,11 @@ package compression -import "testing" +import ( + "os" + "testing" + + "github.com/containers/podman/v5/pkg/machine/define" +) func Test_compressionFromFile(t *testing.T) { type args struct { @@ -89,3 +94,102 @@ func TestImageCompression_String(t *testing.T) { }) } } + +func Test_Decompress(t *testing.T) { + type args struct { + src string + dst string + } + + type want struct { + content string + } + + tests := []struct { + name string + args args + want want + }{ + { + name: "zip", + args: args{ + src: "./testfiles/sample.zip", + dst: "./testfiles/hellozip", + }, + want: want{ + content: "zip\n", + }, + }, + { + name: "xz", + args: args{ + src: "./testfiles/sample.xz", + dst: "./testfiles/helloxz", + }, + want: want{ + content: "xz\n", + }, + }, + { + name: "gzip", + args: args{ + src: "./testfiles/sample.gz", + dst: "./testfiles/hellogz", + }, + want: want{ + content: "gzip\n", + }, + }, + { + name: "bzip2", + args: args{ + src: "./testfiles/sample.bz2", + dst: "./testfiles/hellobz2", + }, + want: want{ + content: "bzip2\n", + }, + }, + { + name: "zstd", + args: args{ + src: "./testfiles/sample.zst", + dst: "./testfiles/hellozstd", + }, + want: want{ + content: "zstd\n", + }, + }, + { + name: "uncompressed", + args: args{ + src: "./testfiles/sample.uncompressed", + dst: "./testfiles/hellozuncompressed", + }, + want: want{ + content: "uncompressed\n", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + srcVMFile := &define.VMFile{Path: tt.args.src} + dstFilePath := tt.args.dst + + defer os.Remove(dstFilePath) + + if err := Decompress(srcVMFile, dstFilePath); err != nil { + t.Fatalf("decompress() error = %v", err) + } + + data, err := os.ReadFile(dstFilePath) + if err != nil { + t.Fatalf("ReadFile() error = %v", err) + } + + if got := string(data); got != tt.want.content { + t.Fatalf("content = %v, want %v", got, tt.want.content) + } + }) + } +} diff --git a/pkg/machine/compression/decompress.go b/pkg/machine/compression/decompress.go index 5111f5aa96..b8970a3c37 100644 --- a/pkg/machine/compression/decompress.go +++ b/pkg/machine/compression/decompress.go @@ -1,313 +1,114 @@ package compression import ( - "archive/zip" - "bufio" - "compress/gzip" - "errors" - "fmt" "io" "os" - "os/exec" "path/filepath" "runtime" "strings" - "github.com/containers/image/v5/pkg/compression" "github.com/containers/podman/v5/pkg/machine/define" "github.com/containers/podman/v5/utils" "github.com/containers/storage/pkg/archive" - crcOs "github.com/crc-org/crc/v2/pkg/os" - "github.com/klauspost/compress/zstd" "github.com/sirupsen/logrus" - "github.com/ulikunitz/xz" ) -// Decompress is a generic wrapper for various decompression algos -// TODO this needs some love. in the various decompression functions that are -// called, the same uncompressed path is being opened multiple times. -func Decompress(localPath *define.VMFile, uncompressedPath string) error { - var isZip bool - uncompressedFileWriter, err := os.OpenFile(uncompressedPath, os.O_CREATE|os.O_RDWR, 0600) - if err != nil { - return err - } - defer func() { - if err := uncompressedFileWriter.Close(); err != nil && !errors.Is(err, os.ErrClosed) { - logrus.Warnf("unable to close decompressed file %s: %q", uncompressedPath, err) - } - }() - sourceFile, err := localPath.Read() - if err != nil { - return err - } - if strings.HasSuffix(localPath.GetPath(), ".zip") { - isZip = true - } - compressionType := archive.DetectCompression(sourceFile) - - prefix := "Extracting compressed file" - prefix += ": " + filepath.Base(uncompressedPath) - switch compressionType { - case archive.Xz: - return decompressXZ(prefix, localPath.GetPath(), uncompressedFileWriter) - case archive.Uncompressed: - if isZip && runtime.GOOS == "windows" { - return decompressZip(prefix, localPath.GetPath(), uncompressedFileWriter) - } - // here we should just do a copy - dstFile, err := os.Open(localPath.GetPath()) - if err != nil { - return err - } - // darwin really struggles with sparse files. being diligent here - fmt.Printf("Copying uncompressed file %q to %q/n", localPath.GetPath(), dstFile.Name()) - - // Keeping CRC implementation for now, but ideally this could be pruned and - // sparsewriter could be used. in that case, this area needs rework or - // sparsewriter be made to honor the *file interface - _, err = crcOs.CopySparse(uncompressedFileWriter, dstFile) - return err - case archive.Gzip: - if runtime.GOOS == "darwin" { - return decompressGzWithSparse(prefix, localPath, uncompressedFileWriter) - } - fallthrough - case archive.Zstd: - if runtime.GOOS == "darwin" { - return decompressZstdWithSparse(prefix, localPath, uncompressedFileWriter) - } - fallthrough - default: - return decompressEverythingElse(prefix, localPath.GetPath(), uncompressedFileWriter) - } - - // if compressionType != archive.Uncompressed || isZip { - // prefix = "Extracting compressed file" - // } - // prefix += ": " + filepath.Base(uncompressedPath) - // if compressionType == archive.Xz { - // return decompressXZ(prefix, localPath.GetPath(), uncompressedFileWriter) - // } - // if isZip && runtime.GOOS == "windows" { - // return decompressZip(prefix, localPath.GetPath(), uncompressedFileWriter) - // } - - // Unfortunately GZ is not sparse capable. Lets handle it differently - // if compressionType == archive.Gzip && runtime.GOOS == "darwin" { - // return decompressGzWithSparse(prefix, localPath, uncompressedPath) - // } - // return decompressEverythingElse(prefix, localPath.GetPath(), uncompressedFileWriter) -} - -// Will error out if file without .Xz already exists -// Maybe extracting then renaming is a good idea here.. -// depends on Xz: not pre-installed on mac, so it becomes a brew dependency -func decompressXZ(prefix string, src string, output io.WriteCloser) error { - var read io.Reader - var cmd *exec.Cmd - - stat, err := os.Stat(src) - if err != nil { - return err - } - file, err := os.Open(src) - if err != nil { - return err - } - defer file.Close() - - p, bar := utils.ProgressBar(prefix, stat.Size(), prefix+": done") - proxyReader := bar.ProxyReader(file) - defer func() { - if err := proxyReader.Close(); err != nil { - logrus.Error(err) - } - }() - - // Prefer Xz utils for fastest performance, fallback to go xi2 impl - if _, err := exec.LookPath("xz"); err == nil { - cmd = exec.Command("xz", "-d", "-c") - cmd.Stdin = proxyReader - read, err = cmd.StdoutPipe() - if err != nil { - return err - } - cmd.Stderr = os.Stderr - } else { - // This XZ implementation is reliant on buffering. It is also 3x+ slower than XZ utils. - // Consider replacing with a faster implementation (e.g. xi2) if podman machine is - // updated with a larger image for the distribution base. - buf := bufio.NewReader(proxyReader) - read, err = xz.NewReader(buf) - if err != nil { - return err - } - } - - done := make(chan bool) - go func() { - if _, err := io.Copy(output, read); err != nil { - logrus.Error(err) - } - output.Close() - done <- true - }() +const ( + zipExt = ".zip" + progressBarPrefix = "Extracting compressed file" + macOs = "darwin" +) - if cmd != nil { - err := cmd.Start() - if err != nil { - return err - } - p.Wait() - return cmd.Wait() - } - <-done - p.Wait() - return nil +type decompressor interface { + srcFilePath() string + reader() (io.Reader, error) + copy(w *os.File, r io.Reader) error + close() } -func decompressEverythingElse(prefix string, src string, output io.WriteCloser) error { - stat, err := os.Stat(src) - if err != nil { - return err - } - f, err := os.Open(src) - if err != nil { - return err - } - p, bar := utils.ProgressBar(prefix, stat.Size(), prefix+": done") - proxyReader := bar.ProxyReader(f) - defer func() { - if err := proxyReader.Close(); err != nil { - logrus.Error(err) - } - }() - uncompressStream, _, err := compression.AutoDecompress(proxyReader) - if err != nil { - return err +func newDecompressor(compressedFilePath string, compressedFileContent []byte) decompressor { + compressionType := archive.DetectCompression(compressedFileContent) + os := runtime.GOOS + hasZipSuffix := strings.HasSuffix(compressedFilePath, zipExt) + + switch { + case compressionType == archive.Xz: + return newXzDecompressor(compressedFilePath) + case compressionType == archive.Uncompressed && hasZipSuffix: + return newZipDecompressor(compressedFilePath) + case compressionType == archive.Uncompressed: + return newUncompressedDecompressor(compressedFilePath) + case compressionType == archive.Gzip && os == macOs: + return newGzipDecompressor(compressedFilePath) + default: + return newGenericDecompressor(compressedFilePath) } - defer func() { - if err := uncompressStream.Close(); err != nil { - logrus.Error(err) - } - if err := output.Close(); err != nil { - logrus.Error(err) - } - }() - - _, err = io.Copy(output, uncompressStream) - p.Wait() - return err } -func decompressZip(prefix string, src string, output io.WriteCloser) error { - zipReader, err := zip.OpenReader(src) - if err != nil { - return err - } - if len(zipReader.File) != 1 { - return errors.New("machine image files should consist of a single compressed file") - } - f, err := zipReader.File[0].Open() +func Decompress(srcVMFile *define.VMFile, dstFilePath string) error { + srcFilePath := srcVMFile.GetPath() + // Are we reading full image file? + // Only few bytes are read to detect + // the compression type + srcFileContent, err := srcVMFile.Read() if err != nil { return err } - defer func() { - if err := f.Close(); err != nil { - logrus.Error(err) - } - }() - defer func() { - if err := output.Close(); err != nil { - logrus.Error(err) - } - }() - size := int64(zipReader.File[0].CompressedSize64) - p, bar := utils.ProgressBar(prefix, size, prefix+": done") - proxyReader := bar.ProxyReader(f) - defer func() { - if err := proxyReader.Close(); err != nil { - logrus.Error(err) - } - }() - _, err = io.Copy(output, proxyReader) - p.Wait() - return err -} - -func decompressWithSparse(prefix string, compressedReader io.Reader, uncompressedFile *os.File) error { - dstFile := NewSparseWriter(uncompressedFile) - defer func() { - if err := dstFile.Close(); err != nil { - logrus.Errorf("unable to close uncompressed file %s: %q", uncompressedFile.Name(), err) - } - }() - // TODO remove the following line when progress bars work - _ = prefix - // p, bar := utils.ProgressBar(prefix, stat.Size(), prefix+": done") - // proxyReader := bar.ProxyReader(f) - // defer func() { - // if err := proxyReader.Close(); err != nil { - // logrus.Error(err) - // } - // }() - - // p.Wait() - _, err := io.Copy(dstFile, compressedReader) - return err + d := newDecompressor(srcFilePath, srcFileContent) + return runDecompression(d, dstFilePath) } -func decompressGzWithSparse(prefix string, compressedPath *define.VMFile, uncompressedFileWriter *os.File) error { - logrus.Debugf("decompressing %s", compressedPath.GetPath()) - f, err := os.Open(compressedPath.GetPath()) +func runDecompression(d decompressor, dstFilePath string) error { + decompressorReader, err := d.reader() if err != nil { return err } - defer func() { - if err := f.Close(); err != nil { - logrus.Errorf("unable to close on compressed file %s: %q", compressedPath.GetPath(), err) - } - }() + defer d.close() - gzReader, err := gzip.NewReader(f) + stat, err := os.Stat(d.srcFilePath()) if err != nil { return err } - defer func() { - if err := gzReader.Close(); err != nil { - logrus.Errorf("unable to close gzreader: %q", err) - } - }() - // This way we get something to look at in debug mode - defer func() { - logrus.Debug("decompression complete") - }() - return decompressWithSparse(prefix, gzReader, uncompressedFileWriter) -} -func decompressZstdWithSparse(prefix string, compressedPath *define.VMFile, uncompressedFileWriter *os.File) error { - logrus.Debugf("decompressing %s", compressedPath.GetPath()) - f, err := os.Open(compressedPath.GetPath()) + initMsg := progressBarPrefix + ": " + filepath.Base(dstFilePath) + finalMsg := initMsg + ": done" + + // We are getting the compressed file size but + // the progress bar needs the full size of the + // decompressed file. + // As a result the progress bar shows 100% + // before the decompression completes. + // A workaround is to set the size to -1 but the + // side effect is that we won't see any advancment in + // the bar. + // An update in utils.ProgressBar to handle is needed + // to improve the case of size=-1 (i.e. unkwonw size). + p, bar := utils.ProgressBar(initMsg, stat.Size(), finalMsg) + // Wait for bars to complete and then shut down the bars container + defer p.Wait() + + readProxy := bar.ProxyReader(decompressorReader) + // Interrupts the bar goroutine. It's important that + // bar.Abort(false) is called before p.Wait(), otherwise + // can hang. + defer bar.Abort(false) + + dstFileWriter, err := os.OpenFile(dstFilePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, stat.Mode()) if err != nil { + logrus.Errorf("Unable to open destination file %s for writing: %q", dstFilePath, err) return err } defer func() { - if err := f.Close(); err != nil { - logrus.Errorf("unable to close on compressed file %s: %q", compressedPath.GetPath(), err) + if err := dstFileWriter.Close(); err != nil { + logrus.Errorf("Unable to to close destination file %s: %q", dstFilePath, err) } }() - zstdReader, err := zstd.NewReader(f) + err = d.copy(dstFileWriter, readProxy) if err != nil { + logrus.Errorf("Error extracting compressed file: %q", err) return err } - defer zstdReader.Close() - - // This way we get something to look at in debug mode - defer func() { - logrus.Debug("decompression complete") - }() - return decompressWithSparse(prefix, zstdReader, uncompressedFileWriter) + return nil } diff --git a/pkg/machine/compression/generic.go b/pkg/machine/compression/generic.go new file mode 100644 index 0000000000..85e7ee7b6c --- /dev/null +++ b/pkg/machine/compression/generic.go @@ -0,0 +1,54 @@ +package compression + +import ( + "io" + "os" + + "github.com/containers/image/v5/pkg/compression" + "github.com/sirupsen/logrus" +) + +type genericDecompressor struct { + compressedFilePath string + compressedFile *os.File + uncompressStream io.ReadCloser +} + +func newGenericDecompressor(compressedFilePath string) decompressor { + return &genericDecompressor{ + compressedFilePath: compressedFilePath, + } +} + +func (d *genericDecompressor) srcFilePath() string { + return d.compressedFilePath +} + +func (d *genericDecompressor) reader() (io.Reader, error) { + srcFile, err := os.Open(d.compressedFilePath) + if err != nil { + return nil, err + } + d.compressedFile = srcFile + return srcFile, nil +} + +func (d *genericDecompressor) copy(w *os.File, r io.Reader) error { + uncompressStream, _, err := compression.AutoDecompress(r) + if err != nil { + return err + } + d.uncompressStream = uncompressStream + + _, err = io.Copy(w, uncompressStream) + return err +} + +func (d *genericDecompressor) close() { + if err := d.compressedFile.Close(); err != nil { + logrus.Errorf("Unable to close compressed file: %q", err) + } + if err := d.uncompressStream.Close(); err != nil { + logrus.Errorf("Unable to close uncompressed stream: %q", err) + } +} diff --git a/pkg/machine/compression/gzip.go b/pkg/machine/compression/gzip.go new file mode 100644 index 0000000000..799a0eda01 --- /dev/null +++ b/pkg/machine/compression/gzip.go @@ -0,0 +1,56 @@ +package compression + +import ( + "compress/gzip" + "io" + "os" + + crcOs "github.com/crc-org/crc/v2/pkg/os" + "github.com/sirupsen/logrus" +) + +type gzDecompressor struct { + compressedFilePath string + compressedFile *os.File + gzReader *gzip.Reader +} + +func newGzipDecompressor(compressedFilePath string) decompressor { + return &gzDecompressor{ + compressedFilePath: compressedFilePath, + } +} + +func (d *gzDecompressor) srcFilePath() string { + return d.compressedFilePath +} + +func (d *gzDecompressor) reader() (io.Reader, error) { + srcFile, err := os.Open(d.compressedFilePath) + if err != nil { + return nil, err + } + d.compressedFile = srcFile + + gzReader, err := gzip.NewReader(srcFile) + if err != nil { + return gzReader, err + } + d.gzReader = gzReader + + return gzReader, nil +} + +func (*gzDecompressor) copy(w *os.File, r io.Reader) error { + _, err := crcOs.CopySparse(w, r) + return err +} + +func (d *gzDecompressor) close() { + if err := d.compressedFile.Close(); err != nil { + logrus.Errorf("Unable to close gz file: %q", err) + } + if err := d.gzReader.Close(); err != nil { + logrus.Errorf("Unable to close gz file: %q", err) + } +} diff --git a/pkg/machine/compression/testfiles/sample.bz2 b/pkg/machine/compression/testfiles/sample.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..499e27f3186a1120ecc6c439a5519892f8637b38 GIT binary patch literal 46 zcmZ>Y%CIzaj8qGbe1G3Rh=GC8vw=ZCfI&dPK|p~)Ng~65H9{ixq{u@j=l!*|9snKd B4153p literal 0 HcmV?d00001 diff --git a/pkg/machine/compression/testfiles/sample.gz b/pkg/machine/compression/testfiles/sample.gz new file mode 100644 index 0000000000000000000000000000000000000000..7936e092f1db92de972b4edba094a1f7285aa663 GIT binary patch literal 31 mcmb2|=HQT%I+V)5oRON7lh5G2{*=xGCWiktafz%93=9C1SqVY_ literal 0 HcmV?d00001 diff --git a/pkg/machine/compression/testfiles/sample.uncompressed b/pkg/machine/compression/testfiles/sample.uncompressed new file mode 100644 index 0000000000..edfd10ef0d --- /dev/null +++ b/pkg/machine/compression/testfiles/sample.uncompressed @@ -0,0 +1 @@ +uncompressed diff --git a/pkg/machine/compression/testfiles/sample.xz b/pkg/machine/compression/testfiles/sample.xz new file mode 100644 index 0000000000000000000000000000000000000000..d791191d35e666f593bc72444d036982af4d2bfb GIT binary patch literal 60 zcmexsUKJ6=z`*kC+7>q^21Q0O1_p)_{ill=8JH@nxEL5_+~dgF_t@B-fl->7TYsm3 P{I<`vj6gLEERj(FAKVb3 literal 0 HcmV?d00001 diff --git a/pkg/machine/compression/testfiles/sample.zip b/pkg/machine/compression/testfiles/sample.zip new file mode 100644 index 0000000000000000000000000000000000000000..481aeadc5260f128ca9c1c3bb9cbef07f4be5841 GIT binary patch literal 164 zcmWIWW@h1H0D&jTZV`+BYprDgvO$=YL53kCH76%OG=!6Zxmw~-Y7G#VR&X;gvV3I( zsu2Mys>&?j3h-uRl4HhYhy+j-0|QV!!;(f23u+`Q#7H#b0=!w-K#CZF&>KiQgE$NT Ds}vm! literal 0 HcmV?d00001 diff --git a/pkg/machine/compression/testfiles/sample.zst b/pkg/machine/compression/testfiles/sample.zst new file mode 100644 index 0000000000000000000000000000000000000000..8fbd12afb0b0285f11c8dadb513704af9ef7ae04 GIT binary patch literal 18 ZcmdPcs{dDoRg;0Cs< Date: Mon, 19 Feb 2024 12:19:40 +0100 Subject: [PATCH 02/12] Use github.com/stretchr/testify assert in compression_test.go Signed-off-by: Mario Loriedo --- pkg/machine/compression/compression_test.go | 88 +++------------------ 1 file changed, 12 insertions(+), 76 deletions(-) diff --git a/pkg/machine/compression/compression_test.go b/pkg/machine/compression/compression_test.go index d66e0a07f8..a353d80044 100644 --- a/pkg/machine/compression/compression_test.go +++ b/pkg/machine/compression/compression_test.go @@ -5,6 +5,7 @@ import ( "testing" "github.com/containers/podman/v5/pkg/machine/define" + "github.com/stretchr/testify/assert" ) func Test_compressionFromFile(t *testing.T) { @@ -101,95 +102,30 @@ func Test_Decompress(t *testing.T) { dst string } - type want struct { - content string - } + type want string tests := []struct { name string args args want want }{ - { - name: "zip", - args: args{ - src: "./testfiles/sample.zip", - dst: "./testfiles/hellozip", - }, - want: want{ - content: "zip\n", - }, - }, - { - name: "xz", - args: args{ - src: "./testfiles/sample.xz", - dst: "./testfiles/helloxz", - }, - want: want{ - content: "xz\n", - }, - }, - { - name: "gzip", - args: args{ - src: "./testfiles/sample.gz", - dst: "./testfiles/hellogz", - }, - want: want{ - content: "gzip\n", - }, - }, - { - name: "bzip2", - args: args{ - src: "./testfiles/sample.bz2", - dst: "./testfiles/hellobz2", - }, - want: want{ - content: "bzip2\n", - }, - }, - { - name: "zstd", - args: args{ - src: "./testfiles/sample.zst", - dst: "./testfiles/hellozstd", - }, - want: want{ - content: "zstd\n", - }, - }, - { - name: "uncompressed", - args: args{ - src: "./testfiles/sample.uncompressed", - dst: "./testfiles/hellozuncompressed", - }, - want: want{ - content: "uncompressed\n", - }, - }, + {name: "zip", args: args{src: "./testfiles/sample.zip", dst: "./testfiles/hellozip"}, want: "zip\n"}, + {name: "xz", args: args{src: "./testfiles/sample.xz", dst: "./testfiles/helloxz"}, want: "xz\n"}, + {name: "gzip", args: args{src: "./testfiles/sample.gz", dst: "./testfiles/hellogz"}, want: "gzip\n"}, + {name: "bzip2", args: args{src: "./testfiles/sample.bz2", dst: "./testfiles/hellobz2"}, want: "bzip2\n"}, + {name: "zstd", args: args{src: "./testfiles/sample.zst", dst: "./testfiles/hellozstd"}, want: "zstd\n"}, + {name: "uncompressed", args: args{src: "./testfiles/sample.uncompressed", dst: "./testfiles/hellozuncompressed"}, want: "uncompressed\n"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { srcVMFile := &define.VMFile{Path: tt.args.src} dstFilePath := tt.args.dst - defer os.Remove(dstFilePath) - - if err := Decompress(srcVMFile, dstFilePath); err != nil { - t.Fatalf("decompress() error = %v", err) - } - + err := Decompress(srcVMFile, dstFilePath) + assert.NoError(t, err) data, err := os.ReadFile(dstFilePath) - if err != nil { - t.Fatalf("ReadFile() error = %v", err) - } - - if got := string(data); got != tt.want.content { - t.Fatalf("content = %v, want %v", got, tt.want.content) - } + assert.NoError(t, err) + assert.Equal(t, string(tt.want), string(data)) }) } } From c42d3a74edc512349afd7bad1abb0e35a5273ac3 Mon Sep 17 00:00:00 2001 From: Mario Loriedo Date: Mon, 19 Feb 2024 12:38:05 +0100 Subject: [PATCH 03/12] Add a comment to explain why we look at file name for zip files Signed-off-by: Mario Loriedo --- pkg/machine/compression/decompress.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/machine/compression/decompress.go b/pkg/machine/compression/decompress.go index b8970a3c37..ca3986f2d9 100644 --- a/pkg/machine/compression/decompress.go +++ b/pkg/machine/compression/decompress.go @@ -34,6 +34,8 @@ func newDecompressor(compressedFilePath string, compressedFileContent []byte) de switch { case compressionType == archive.Xz: return newXzDecompressor(compressedFilePath) + // Zip files are not guaranteed to have a magic number at the beginning + // of the file, so we need to use the file name to detect them. case compressionType == archive.Uncompressed && hasZipSuffix: return newZipDecompressor(compressedFilePath) case compressionType == archive.Uncompressed: From 2245cf8dc43ddcba58faf6b4f7ea500345e0961c Mon Sep 17 00:00:00 2001 From: Mario Loriedo Date: Tue, 20 Feb 2024 13:56:12 +0100 Subject: [PATCH 04/12] Remove duplication and make consistent usage of the progress bar Signed-off-by: Mario Loriedo --- pkg/machine/compression/decompress.go | 93 ++++++++++++------------- pkg/machine/compression/generic.go | 50 ++++++++----- pkg/machine/compression/gzip.go | 44 ++++-------- pkg/machine/compression/uncompressed.go | 34 ++------- pkg/machine/compression/xz.go | 31 ++------- pkg/machine/compression/zip.go | 26 ++++--- 6 files changed, 111 insertions(+), 167 deletions(-) diff --git a/pkg/machine/compression/decompress.go b/pkg/machine/compression/decompress.go index ca3986f2d9..639dc1a3f3 100644 --- a/pkg/machine/compression/decompress.go +++ b/pkg/machine/compression/decompress.go @@ -1,6 +1,7 @@ package compression import ( + "errors" "io" "os" "path/filepath" @@ -14,19 +15,39 @@ import ( ) const ( - zipExt = ".zip" - progressBarPrefix = "Extracting compressed file" - macOs = "darwin" + decompressedFileFlag = os.O_CREATE | os.O_TRUNC | os.O_WRONLY + macOs = "darwin" + progressBarPrefix = "Extracting compressed file" + zipExt = ".zip" ) type decompressor interface { - srcFilePath() string - reader() (io.Reader, error) - copy(w *os.File, r io.Reader) error + compressedFileSize() int64 + compressedFileMode() os.FileMode + compressedFileReader() (io.ReadCloser, error) + decompress(w io.WriteSeeker, r io.Reader) error close() } -func newDecompressor(compressedFilePath string, compressedFileContent []byte) decompressor { +func Decompress(compressedVMFile *define.VMFile, decompressedFilePath string) error { + compressedFilePath := compressedVMFile.GetPath() + // Are we reading full image file? + // Only few bytes are read to detect + // the compression type + compressedFileContent, err := compressedVMFile.Read() + if err != nil { + return err + } + + var d decompressor + if d, err = newDecompressor(compressedFilePath, compressedFileContent); err != nil { + return err + } + + return runDecompression(d, decompressedFilePath) +} + +func newDecompressor(compressedFilePath string, compressedFileContent []byte) (decompressor, error) { compressionType := archive.DetectCompression(compressedFileContent) os := runtime.GOOS hasZipSuffix := strings.HasSuffix(compressedFilePath, zipExt) @@ -40,6 +61,10 @@ func newDecompressor(compressedFilePath string, compressedFileContent []byte) de return newZipDecompressor(compressedFilePath) case compressionType == archive.Uncompressed: return newUncompressedDecompressor(compressedFilePath) + // macOS gzipped VM images are sparse. As a result a + // special decompressor is required: it uses crc os.CopySparse + // instead of io.Copy and std lib gzip instead of klauspost/pgzip + // (even if it's slower). case compressionType == archive.Gzip && os == macOs: return newGzipDecompressor(compressedFilePath) default: @@ -47,70 +72,42 @@ func newDecompressor(compressedFilePath string, compressedFileContent []byte) de } } -func Decompress(srcVMFile *define.VMFile, dstFilePath string) error { - srcFilePath := srcVMFile.GetPath() - // Are we reading full image file? - // Only few bytes are read to detect - // the compression type - srcFileContent, err := srcVMFile.Read() - if err != nil { - return err - } - - d := newDecompressor(srcFilePath, srcFileContent) - return runDecompression(d, dstFilePath) -} - -func runDecompression(d decompressor, dstFilePath string) error { - decompressorReader, err := d.reader() +func runDecompression(d decompressor, decompressedFilePath string) error { + compressedFileReader, err := d.compressedFileReader() if err != nil { return err } defer d.close() - stat, err := os.Stat(d.srcFilePath()) - if err != nil { - return err - } - - initMsg := progressBarPrefix + ": " + filepath.Base(dstFilePath) + initMsg := progressBarPrefix + ": " + filepath.Base(decompressedFilePath) finalMsg := initMsg + ": done" - // We are getting the compressed file size but - // the progress bar needs the full size of the - // decompressed file. - // As a result the progress bar shows 100% - // before the decompression completes. - // A workaround is to set the size to -1 but the - // side effect is that we won't see any advancment in - // the bar. - // An update in utils.ProgressBar to handle is needed - // to improve the case of size=-1 (i.e. unkwonw size). - p, bar := utils.ProgressBar(initMsg, stat.Size(), finalMsg) + p, bar := utils.ProgressBar(initMsg, d.compressedFileSize(), finalMsg) // Wait for bars to complete and then shut down the bars container defer p.Wait() - readProxy := bar.ProxyReader(decompressorReader) + compressedFileReaderProxy := bar.ProxyReader(compressedFileReader) // Interrupts the bar goroutine. It's important that // bar.Abort(false) is called before p.Wait(), otherwise // can hang. defer bar.Abort(false) - dstFileWriter, err := os.OpenFile(dstFilePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, stat.Mode()) - if err != nil { - logrus.Errorf("Unable to open destination file %s for writing: %q", dstFilePath, err) + var decompressedFileWriter *os.File + + if decompressedFileWriter, err = os.OpenFile(decompressedFilePath, decompressedFileFlag, d.compressedFileMode()); err != nil { + logrus.Errorf("Unable to open destination file %s for writing: %q", decompressedFilePath, err) return err } defer func() { - if err := dstFileWriter.Close(); err != nil { - logrus.Errorf("Unable to to close destination file %s: %q", dstFilePath, err) + if err := decompressedFileWriter.Close(); err != nil && !errors.Is(err, os.ErrClosed) { + logrus.Warnf("Unable to to close destination file %s: %q", decompressedFilePath, err) } }() - err = d.copy(dstFileWriter, readProxy) - if err != nil { + if err = d.decompress(decompressedFileWriter, compressedFileReaderProxy); err != nil { logrus.Errorf("Error extracting compressed file: %q", err) return err } + return nil } diff --git a/pkg/machine/compression/generic.go b/pkg/machine/compression/generic.go index 85e7ee7b6c..9488e0184f 100644 --- a/pkg/machine/compression/generic.go +++ b/pkg/machine/compression/generic.go @@ -2,6 +2,7 @@ package compression import ( "io" + "io/fs" "os" "github.com/containers/image/v5/pkg/compression" @@ -9,38 +10,48 @@ import ( ) type genericDecompressor struct { - compressedFilePath string - compressedFile *os.File - uncompressStream io.ReadCloser + compressedFilePath string + compressedFile *os.File + decompressedFileReader io.ReadCloser + compressedFileInfo os.FileInfo } -func newGenericDecompressor(compressedFilePath string) decompressor { - return &genericDecompressor{ - compressedFilePath: compressedFilePath, +func newGenericDecompressor(compressedFilePath string) (*genericDecompressor, error) { + d := &genericDecompressor{} + d.compressedFilePath = compressedFilePath + stat, err := os.Stat(d.compressedFilePath) + if err != nil { + return nil, err } + d.compressedFileInfo = stat + return d, nil } -func (d *genericDecompressor) srcFilePath() string { - return d.compressedFilePath +func (d *genericDecompressor) compressedFileSize() int64 { + return d.compressedFileInfo.Size() } -func (d *genericDecompressor) reader() (io.Reader, error) { - srcFile, err := os.Open(d.compressedFilePath) +func (d *genericDecompressor) compressedFileMode() fs.FileMode { + return d.compressedFileInfo.Mode() +} + +func (d *genericDecompressor) compressedFileReader() (io.ReadCloser, error) { + compressedFile, err := os.Open(d.compressedFilePath) if err != nil { return nil, err } - d.compressedFile = srcFile - return srcFile, nil + d.compressedFile = compressedFile + return compressedFile, nil } -func (d *genericDecompressor) copy(w *os.File, r io.Reader) error { - uncompressStream, _, err := compression.AutoDecompress(r) +func (d *genericDecompressor) decompress(w io.WriteSeeker, r io.Reader) error { + decompressedFileReader, _, err := compression.AutoDecompress(r) if err != nil { return err } - d.uncompressStream = uncompressStream + d.decompressedFileReader = decompressedFileReader - _, err = io.Copy(w, uncompressStream) + _, err = io.Copy(w, decompressedFileReader) return err } @@ -48,7 +59,10 @@ func (d *genericDecompressor) close() { if err := d.compressedFile.Close(); err != nil { logrus.Errorf("Unable to close compressed file: %q", err) } - if err := d.uncompressStream.Close(); err != nil { - logrus.Errorf("Unable to close uncompressed stream: %q", err) + + if d.decompressedFileReader != nil { + if err := d.decompressedFileReader.Close(); err != nil { + logrus.Errorf("Unable to close uncompressed stream: %q", err) + } } } diff --git a/pkg/machine/compression/gzip.go b/pkg/machine/compression/gzip.go index 799a0eda01..4e3130a4f9 100644 --- a/pkg/machine/compression/gzip.go +++ b/pkg/machine/compression/gzip.go @@ -3,54 +3,34 @@ package compression import ( "compress/gzip" "io" - "os" crcOs "github.com/crc-org/crc/v2/pkg/os" "github.com/sirupsen/logrus" ) -type gzDecompressor struct { - compressedFilePath string - compressedFile *os.File - gzReader *gzip.Reader +type gzipDecompressor struct { + genericDecompressor + gzReader io.ReadCloser } -func newGzipDecompressor(compressedFilePath string) decompressor { - return &gzDecompressor{ - compressedFilePath: compressedFilePath, - } -} - -func (d *gzDecompressor) srcFilePath() string { - return d.compressedFilePath +func newGzipDecompressor(compressedFilePath string) (*gzipDecompressor, error) { + d, err := newGenericDecompressor(compressedFilePath) + return &gzipDecompressor{*d, nil}, err } -func (d *gzDecompressor) reader() (io.Reader, error) { - srcFile, err := os.Open(d.compressedFilePath) - if err != nil { - return nil, err - } - d.compressedFile = srcFile - - gzReader, err := gzip.NewReader(srcFile) +func (d *gzipDecompressor) decompress(w io.WriteSeeker, r io.Reader) error { + gzReader, err := gzip.NewReader(r) if err != nil { - return gzReader, err + return err } d.gzReader = gzReader - - return gzReader, nil -} - -func (*gzDecompressor) copy(w *os.File, r io.Reader) error { - _, err := crcOs.CopySparse(w, r) + _, err = crcOs.CopySparse(w, gzReader) return err } -func (d *gzDecompressor) close() { - if err := d.compressedFile.Close(); err != nil { - logrus.Errorf("Unable to close gz file: %q", err) - } +func (d *gzipDecompressor) close() { if err := d.gzReader.Close(); err != nil { logrus.Errorf("Unable to close gz file: %q", err) } + d.genericDecompressor.close() } diff --git a/pkg/machine/compression/uncompressed.go b/pkg/machine/compression/uncompressed.go index 2796a8cbbe..4a3e0729c8 100644 --- a/pkg/machine/compression/uncompressed.go +++ b/pkg/machine/compression/uncompressed.go @@ -2,44 +2,20 @@ package compression import ( "io" - "os" crcOs "github.com/crc-org/crc/v2/pkg/os" - "github.com/sirupsen/logrus" ) type uncompressedDecompressor struct { - compressedFilePath string - compressedFile *os.File + genericDecompressor } -func newUncompressedDecompressor(compressedFilePath string) decompressor { - return &uncompressedDecompressor{ - compressedFilePath: compressedFilePath, - } +func newUncompressedDecompressor(compressedFilePath string) (*uncompressedDecompressor, error) { + d, err := newGenericDecompressor(compressedFilePath) + return &uncompressedDecompressor{*d}, err } -func (d *uncompressedDecompressor) srcFilePath() string { - return d.compressedFilePath -} - -func (d *uncompressedDecompressor) reader() (io.Reader, error) { - srcFile, err := os.Open(d.compressedFilePath) - if err != nil { - return nil, err - } - d.compressedFile = srcFile - - return srcFile, nil -} - -func (*uncompressedDecompressor) copy(w *os.File, r io.Reader) error { +func (*uncompressedDecompressor) decompress(w io.WriteSeeker, r io.Reader) error { _, err := crcOs.CopySparse(w, r) return err } - -func (d *uncompressedDecompressor) close() { - if err := d.compressedFile.Close(); err != nil { - logrus.Errorf("Unable to close gz file: %q", err) - } -} diff --git a/pkg/machine/compression/xz.go b/pkg/machine/compression/xz.go index 2353db96a4..d4f492d88e 100644 --- a/pkg/machine/compression/xz.go +++ b/pkg/machine/compression/xz.go @@ -11,33 +11,18 @@ import ( ) type xzDecompressor struct { - compressedFilePath string - compressedFile *os.File + genericDecompressor } -func newXzDecompressor(compressedFilePath string) decompressor { - return &xzDecompressor{ - compressedFilePath: compressedFilePath, - } -} - -func (d *xzDecompressor) srcFilePath() string { - return d.compressedFilePath -} - -func (d *xzDecompressor) reader() (io.Reader, error) { - srcFile, err := os.Open(d.compressedFilePath) - if err != nil { - return nil, err - } - d.compressedFile = srcFile - return srcFile, nil +func newXzDecompressor(compressedFilePath string) (*xzDecompressor, error) { + d, err := newGenericDecompressor(compressedFilePath) + return &xzDecompressor{*d}, err } // Will error out if file without .Xz already exists // Maybe extracting then renaming is a good idea here.. // depends on Xz: not pre-installed on mac, so it becomes a brew dependency -func (*xzDecompressor) copy(w *os.File, r io.Reader) error { +func (*xzDecompressor) decompress(w io.WriteSeeker, r io.Reader) error { var cmd *exec.Cmd var read io.Reader @@ -79,9 +64,3 @@ func (*xzDecompressor) copy(w *os.File, r io.Reader) error { <-done return nil } - -func (d *xzDecompressor) close() { - if err := d.compressedFile.Close(); err != nil { - logrus.Errorf("Unable to close xz file: %q", err) - } -} diff --git a/pkg/machine/compression/zip.go b/pkg/machine/compression/zip.go index fdc3cc30fc..7de1f4813c 100644 --- a/pkg/machine/compression/zip.go +++ b/pkg/machine/compression/zip.go @@ -4,28 +4,26 @@ import ( "archive/zip" "errors" "io" - "os" "github.com/sirupsen/logrus" ) type zipDecompressor struct { - compressedFilePath string - zipReader *zip.ReadCloser - fileReader io.ReadCloser + genericDecompressor + zipReader *zip.ReadCloser + fileReader io.ReadCloser } -func newZipDecompressor(compressedFilePath string) decompressor { - return &zipDecompressor{ - compressedFilePath: compressedFilePath, - } -} - -func (d *zipDecompressor) srcFilePath() string { - return d.compressedFilePath +func newZipDecompressor(compressedFilePath string) (*zipDecompressor, error) { + d, err := newGenericDecompressor(compressedFilePath) + return &zipDecompressor{*d, nil, nil}, err } -func (d *zipDecompressor) reader() (io.Reader, error) { +// This is the only compressor that doesn't return the compressed file +// stream (zip.OpenReader() provides access to the decompressed file). +// As a result the progress bar shows the decompressed file stream +// but the final size is the compressed file size. +func (d *zipDecompressor) compressedFileReader() (io.ReadCloser, error) { zipReader, err := zip.OpenReader(d.compressedFilePath) if err != nil { return nil, err @@ -42,7 +40,7 @@ func (d *zipDecompressor) reader() (io.Reader, error) { return z, nil } -func (*zipDecompressor) copy(w *os.File, r io.Reader) error { +func (*zipDecompressor) decompress(w io.WriteSeeker, r io.Reader) error { _, err := io.Copy(w, r) return err } From 0b3f3f0ef10c7a1f41a8baf8257a82d6a4e3a823 Mon Sep 17 00:00:00 2001 From: Mario Loriedo Date: Wed, 21 Feb 2024 16:02:59 +0100 Subject: [PATCH 05/12] Use faster gzip reader Signed-off-by: Mario Loriedo --- pkg/machine/compression/compression_test.go | 5 +++-- pkg/machine/compression/decompress.go | 2 +- pkg/machine/compression/generic.go | 2 +- pkg/machine/compression/gzip.go | 11 ++++++----- pkg/machine/compression/uncompressed.go | 2 +- pkg/machine/compression/xz.go | 2 +- pkg/machine/compression/zip.go | 2 +- 7 files changed, 14 insertions(+), 12 deletions(-) diff --git a/pkg/machine/compression/compression_test.go b/pkg/machine/compression/compression_test.go index a353d80044..b24b4ba49d 100644 --- a/pkg/machine/compression/compression_test.go +++ b/pkg/machine/compression/compression_test.go @@ -6,6 +6,7 @@ import ( "github.com/containers/podman/v5/pkg/machine/define" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func Test_compressionFromFile(t *testing.T) { @@ -122,9 +123,9 @@ func Test_Decompress(t *testing.T) { dstFilePath := tt.args.dst defer os.Remove(dstFilePath) err := Decompress(srcVMFile, dstFilePath) - assert.NoError(t, err) + require.NoError(t, err) data, err := os.ReadFile(dstFilePath) - assert.NoError(t, err) + require.NoError(t, err) assert.Equal(t, string(tt.want), string(data)) }) } diff --git a/pkg/machine/compression/decompress.go b/pkg/machine/compression/decompress.go index 639dc1a3f3..3aa8f3dccb 100644 --- a/pkg/machine/compression/decompress.go +++ b/pkg/machine/compression/decompress.go @@ -25,7 +25,7 @@ type decompressor interface { compressedFileSize() int64 compressedFileMode() os.FileMode compressedFileReader() (io.ReadCloser, error) - decompress(w io.WriteSeeker, r io.Reader) error + decompress(w WriteSeekCloser, r io.Reader) error close() } diff --git a/pkg/machine/compression/generic.go b/pkg/machine/compression/generic.go index 9488e0184f..d8fb9b25fb 100644 --- a/pkg/machine/compression/generic.go +++ b/pkg/machine/compression/generic.go @@ -44,7 +44,7 @@ func (d *genericDecompressor) compressedFileReader() (io.ReadCloser, error) { return compressedFile, nil } -func (d *genericDecompressor) decompress(w io.WriteSeeker, r io.Reader) error { +func (d *genericDecompressor) decompress(w WriteSeekCloser, r io.Reader) error { decompressedFileReader, _, err := compression.AutoDecompress(r) if err != nil { return err diff --git a/pkg/machine/compression/gzip.go b/pkg/machine/compression/gzip.go index 4e3130a4f9..cd12ea0dea 100644 --- a/pkg/machine/compression/gzip.go +++ b/pkg/machine/compression/gzip.go @@ -1,10 +1,9 @@ package compression import ( - "compress/gzip" "io" - crcOs "github.com/crc-org/crc/v2/pkg/os" + image "github.com/containers/image/v5/pkg/compression" "github.com/sirupsen/logrus" ) @@ -18,13 +17,15 @@ func newGzipDecompressor(compressedFilePath string) (*gzipDecompressor, error) { return &gzipDecompressor{*d, nil}, err } -func (d *gzipDecompressor) decompress(w io.WriteSeeker, r io.Reader) error { - gzReader, err := gzip.NewReader(r) +func (d *gzipDecompressor) decompress(w WriteSeekCloser, r io.Reader) error { + gzReader, err := image.GzipDecompressor(r) if err != nil { return err } d.gzReader = gzReader - _, err = crcOs.CopySparse(w, gzReader) + + sparseWriter := NewSparseWriter(w) + _, err = io.Copy(sparseWriter, gzReader) return err } diff --git a/pkg/machine/compression/uncompressed.go b/pkg/machine/compression/uncompressed.go index 4a3e0729c8..847c59d343 100644 --- a/pkg/machine/compression/uncompressed.go +++ b/pkg/machine/compression/uncompressed.go @@ -15,7 +15,7 @@ func newUncompressedDecompressor(compressedFilePath string) (*uncompressedDecomp return &uncompressedDecompressor{*d}, err } -func (*uncompressedDecompressor) decompress(w io.WriteSeeker, r io.Reader) error { +func (*uncompressedDecompressor) decompress(w WriteSeekCloser, r io.Reader) error { _, err := crcOs.CopySparse(w, r) return err } diff --git a/pkg/machine/compression/xz.go b/pkg/machine/compression/xz.go index d4f492d88e..867181a55e 100644 --- a/pkg/machine/compression/xz.go +++ b/pkg/machine/compression/xz.go @@ -22,7 +22,7 @@ func newXzDecompressor(compressedFilePath string) (*xzDecompressor, error) { // Will error out if file without .Xz already exists // Maybe extracting then renaming is a good idea here.. // depends on Xz: not pre-installed on mac, so it becomes a brew dependency -func (*xzDecompressor) decompress(w io.WriteSeeker, r io.Reader) error { +func (*xzDecompressor) decompress(w WriteSeekCloser, r io.Reader) error { var cmd *exec.Cmd var read io.Reader diff --git a/pkg/machine/compression/zip.go b/pkg/machine/compression/zip.go index 7de1f4813c..bd251b5d88 100644 --- a/pkg/machine/compression/zip.go +++ b/pkg/machine/compression/zip.go @@ -40,7 +40,7 @@ func (d *zipDecompressor) compressedFileReader() (io.ReadCloser, error) { return z, nil } -func (*zipDecompressor) decompress(w io.WriteSeeker, r io.Reader) error { +func (*zipDecompressor) decompress(w WriteSeekCloser, r io.Reader) error { _, err := io.Copy(w, r) return err } From fa99b9be93ba55a6e5d630967934365e05ad8052 Mon Sep 17 00:00:00 2001 From: Mario Loriedo Date: Wed, 21 Feb 2024 16:28:55 +0100 Subject: [PATCH 06/12] renamed testfiles as testdata Signed-off-by: Mario Loriedo --- pkg/machine/compression/compression_test.go | 12 ++++++------ .../compression/{testfiles => testdata}/sample.bz2 | Bin .../compression/{testfiles => testdata}/sample.gz | Bin .../{testfiles => testdata}/sample.uncompressed | 0 .../compression/{testfiles => testdata}/sample.xz | Bin .../compression/{testfiles => testdata}/sample.zip | Bin .../compression/{testfiles => testdata}/sample.zst | Bin 7 files changed, 6 insertions(+), 6 deletions(-) rename pkg/machine/compression/{testfiles => testdata}/sample.bz2 (100%) rename pkg/machine/compression/{testfiles => testdata}/sample.gz (100%) rename pkg/machine/compression/{testfiles => testdata}/sample.uncompressed (100%) rename pkg/machine/compression/{testfiles => testdata}/sample.xz (100%) rename pkg/machine/compression/{testfiles => testdata}/sample.zip (100%) rename pkg/machine/compression/{testfiles => testdata}/sample.zst (100%) diff --git a/pkg/machine/compression/compression_test.go b/pkg/machine/compression/compression_test.go index b24b4ba49d..b41a9077c4 100644 --- a/pkg/machine/compression/compression_test.go +++ b/pkg/machine/compression/compression_test.go @@ -110,12 +110,12 @@ func Test_Decompress(t *testing.T) { args args want want }{ - {name: "zip", args: args{src: "./testfiles/sample.zip", dst: "./testfiles/hellozip"}, want: "zip\n"}, - {name: "xz", args: args{src: "./testfiles/sample.xz", dst: "./testfiles/helloxz"}, want: "xz\n"}, - {name: "gzip", args: args{src: "./testfiles/sample.gz", dst: "./testfiles/hellogz"}, want: "gzip\n"}, - {name: "bzip2", args: args{src: "./testfiles/sample.bz2", dst: "./testfiles/hellobz2"}, want: "bzip2\n"}, - {name: "zstd", args: args{src: "./testfiles/sample.zst", dst: "./testfiles/hellozstd"}, want: "zstd\n"}, - {name: "uncompressed", args: args{src: "./testfiles/sample.uncompressed", dst: "./testfiles/hellozuncompressed"}, want: "uncompressed\n"}, + {name: "zip", args: args{src: "./testdata/sample.zip", dst: "./testdata/hellozip"}, want: "zip\n"}, + {name: "xz", args: args{src: "./testdata/sample.xz", dst: "./testdata/helloxz"}, want: "xz\n"}, + {name: "gzip", args: args{src: "./testdata/sample.gz", dst: "./testdata/hellogz"}, want: "gzip\n"}, + {name: "bzip2", args: args{src: "./testdata/sample.bz2", dst: "./testdata/hellobz2"}, want: "bzip2\n"}, + {name: "zstd", args: args{src: "./testdata/sample.zst", dst: "./testdata/hellozstd"}, want: "zstd\n"}, + {name: "uncompressed", args: args{src: "./testdata/sample.uncompressed", dst: "./testdata/hellozuncompressed"}, want: "uncompressed\n"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/pkg/machine/compression/testfiles/sample.bz2 b/pkg/machine/compression/testdata/sample.bz2 similarity index 100% rename from pkg/machine/compression/testfiles/sample.bz2 rename to pkg/machine/compression/testdata/sample.bz2 diff --git a/pkg/machine/compression/testfiles/sample.gz b/pkg/machine/compression/testdata/sample.gz similarity index 100% rename from pkg/machine/compression/testfiles/sample.gz rename to pkg/machine/compression/testdata/sample.gz diff --git a/pkg/machine/compression/testfiles/sample.uncompressed b/pkg/machine/compression/testdata/sample.uncompressed similarity index 100% rename from pkg/machine/compression/testfiles/sample.uncompressed rename to pkg/machine/compression/testdata/sample.uncompressed diff --git a/pkg/machine/compression/testfiles/sample.xz b/pkg/machine/compression/testdata/sample.xz similarity index 100% rename from pkg/machine/compression/testfiles/sample.xz rename to pkg/machine/compression/testdata/sample.xz diff --git a/pkg/machine/compression/testfiles/sample.zip b/pkg/machine/compression/testdata/sample.zip similarity index 100% rename from pkg/machine/compression/testfiles/sample.zip rename to pkg/machine/compression/testdata/sample.zip diff --git a/pkg/machine/compression/testfiles/sample.zst b/pkg/machine/compression/testdata/sample.zst similarity index 100% rename from pkg/machine/compression/testfiles/sample.zst rename to pkg/machine/compression/testdata/sample.zst From ea4553d5904005e78ee76cec448f2fda10370a03 Mon Sep 17 00:00:00 2001 From: Mario Loriedo Date: Wed, 21 Feb 2024 17:27:39 +0100 Subject: [PATCH 07/12] integrating changes from #21768 Signed-off-by: Mario Loriedo --- pkg/machine/compression/decompress.go | 8 +++--- pkg/machine/compression/uncompressed.go | 7 +++-- pkg/machine/compression/zstd.go | 34 +++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 8 deletions(-) create mode 100644 pkg/machine/compression/zstd.go diff --git a/pkg/machine/compression/decompress.go b/pkg/machine/compression/decompress.go index 3aa8f3dccb..fb37d99b13 100644 --- a/pkg/machine/compression/decompress.go +++ b/pkg/machine/compression/decompress.go @@ -61,12 +61,12 @@ func newDecompressor(compressedFilePath string, compressedFileContent []byte) (d return newZipDecompressor(compressedFilePath) case compressionType == archive.Uncompressed: return newUncompressedDecompressor(compressedFilePath) - // macOS gzipped VM images are sparse. As a result a - // special decompressor is required: it uses crc os.CopySparse - // instead of io.Copy and std lib gzip instead of klauspost/pgzip - // (even if it's slower). + // Using special compressors on MacOS because default ones + // in c/image/pkg/compression are slow with sparse files. case compressionType == archive.Gzip && os == macOs: return newGzipDecompressor(compressedFilePath) + case compressionType == archive.Zstd && os == macOs: + return newZstdDecompressor(compressedFilePath) default: return newGenericDecompressor(compressedFilePath) } diff --git a/pkg/machine/compression/uncompressed.go b/pkg/machine/compression/uncompressed.go index 847c59d343..03ebfb4a41 100644 --- a/pkg/machine/compression/uncompressed.go +++ b/pkg/machine/compression/uncompressed.go @@ -2,8 +2,6 @@ package compression import ( "io" - - crcOs "github.com/crc-org/crc/v2/pkg/os" ) type uncompressedDecompressor struct { @@ -15,7 +13,8 @@ func newUncompressedDecompressor(compressedFilePath string) (*uncompressedDecomp return &uncompressedDecompressor{*d}, err } -func (*uncompressedDecompressor) decompress(w WriteSeekCloser, r io.Reader) error { - _, err := crcOs.CopySparse(w, r) +func (d *uncompressedDecompressor) decompress(w WriteSeekCloser, r io.Reader) error { + sparseWriter := NewSparseWriter(w) + _, err := io.Copy(sparseWriter, r) return err } diff --git a/pkg/machine/compression/zstd.go b/pkg/machine/compression/zstd.go new file mode 100644 index 0000000000..bf234961ea --- /dev/null +++ b/pkg/machine/compression/zstd.go @@ -0,0 +1,34 @@ +package compression + +import ( + "io" + + "github.com/klauspost/compress/zstd" +) + +type zstdDecompressor struct { + genericDecompressor + zstdReader *zstd.Decoder +} + +func newZstdDecompressor(compressedFilePath string) (*zstdDecompressor, error) { + d, err := newGenericDecompressor(compressedFilePath) + return &zstdDecompressor{*d, nil}, err +} + +func (d *zstdDecompressor) decompress(w WriteSeekCloser, r io.Reader) error { + zstdReader, err := zstd.NewReader(r) + if err != nil { + return err + } + d.zstdReader = zstdReader + + sparseWriter := NewSparseWriter(w) + _, err = io.Copy(sparseWriter, zstdReader) + return err +} + +func (d *zstdDecompressor) close() { + d.zstdReader.Close() + d.genericDecompressor.close() +} From f099250beb34b4041e47eb27a3d4aa5b88a47b4e Mon Sep 17 00:00:00 2001 From: Mario Loriedo Date: Sun, 25 Feb 2024 21:04:56 +0100 Subject: [PATCH 08/12] Better file close and err handling Signed-off-by: Mario Loriedo --- pkg/machine/compression/compression_test.go | 8 ++- pkg/machine/compression/generic.go | 19 +++---- pkg/machine/compression/gzip.go | 22 ++++---- .../compression/testdata/gen-testdata.sh | 51 ++++++++++++++++++ .../compression/testdata/sample-withzeros.bz2 | Bin 0 -> 49 bytes .../compression/testdata/sample-withzeros.gz | Bin 0 -> 47 bytes .../testdata/sample-withzeros.uncompressed | Bin 0 -> 20 bytes .../compression/testdata/sample-withzeros.xz | Bin 0 -> 68 bytes .../compression/testdata/sample-withzeros.zip | Bin 0 -> 194 bytes .../compression/testdata/sample-withzeros.zst | Bin 0 -> 23 bytes pkg/machine/compression/testdata/sample.gz | Bin 31 -> 35 bytes pkg/machine/compression/testdata/sample.zip | Bin 164 -> 170 bytes pkg/machine/compression/uncompressed.go | 8 +++ pkg/machine/compression/zstd.go | 17 +++--- 14 files changed, 95 insertions(+), 30 deletions(-) create mode 100755 pkg/machine/compression/testdata/gen-testdata.sh create mode 100644 pkg/machine/compression/testdata/sample-withzeros.bz2 create mode 100644 pkg/machine/compression/testdata/sample-withzeros.gz create mode 100644 pkg/machine/compression/testdata/sample-withzeros.uncompressed create mode 100644 pkg/machine/compression/testdata/sample-withzeros.xz create mode 100644 pkg/machine/compression/testdata/sample-withzeros.zip create mode 100644 pkg/machine/compression/testdata/sample-withzeros.zst diff --git a/pkg/machine/compression/compression_test.go b/pkg/machine/compression/compression_test.go index b41a9077c4..9c51cd8bca 100644 --- a/pkg/machine/compression/compression_test.go +++ b/pkg/machine/compression/compression_test.go @@ -111,11 +111,17 @@ func Test_Decompress(t *testing.T) { want want }{ {name: "zip", args: args{src: "./testdata/sample.zip", dst: "./testdata/hellozip"}, want: "zip\n"}, + {name: "zip with trailing zeros", args: args{src: "./testdata/sample-withzeros.zip", dst: "./testdata/hellozip-withzeros"}, want: "zip\n\x00\x00\x00\x00\x00\x00"}, {name: "xz", args: args{src: "./testdata/sample.xz", dst: "./testdata/helloxz"}, want: "xz\n"}, + {name: "xz with trailing zeros", args: args{src: "./testdata/sample-withzeros.xz", dst: "./testdata/helloxz-withzeros"}, want: "xz\n\x00\x00\x00\x00\x00\x00\x00"}, {name: "gzip", args: args{src: "./testdata/sample.gz", dst: "./testdata/hellogz"}, want: "gzip\n"}, + {name: "gzip with trailing zeros", args: args{src: "./testdata/sample-withzeros.gz", dst: "./testdata/hellogzip-withzeros"}, want: "gzip\n\x00\x00\x00\x00\x00"}, {name: "bzip2", args: args{src: "./testdata/sample.bz2", dst: "./testdata/hellobz2"}, want: "bzip2\n"}, + {name: "bzip2 with trailing zeros", args: args{src: "./testdata/sample-withzeros.bz2", dst: "./testdata/hellobz2-withzeros"}, want: "bzip2\n\x00\x00\x00\x00"}, {name: "zstd", args: args{src: "./testdata/sample.zst", dst: "./testdata/hellozstd"}, want: "zstd\n"}, - {name: "uncompressed", args: args{src: "./testdata/sample.uncompressed", dst: "./testdata/hellozuncompressed"}, want: "uncompressed\n"}, + {name: "zstd with trailing zeros", args: args{src: "./testdata/sample-withzeros.zst", dst: "./testdata/hellozstd-withzeros"}, want: "zstd\n\x00\x00\x00\x00\x00"}, + {name: "uncompressed", args: args{src: "./testdata/sample.uncompressed", dst: "./testdata/hellouncompressed"}, want: "uncompressed\n"}, + {name: "uncompressed with trailing zeros", args: args{src: "./testdata/sample-withzeros.uncompressed", dst: "./testdata/hellozuncompressed-withzeros"}, want: "uncompressed\n\x00\x00\x00\x00\x00\x00\x00"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/pkg/machine/compression/generic.go b/pkg/machine/compression/generic.go index d8fb9b25fb..7f808c2d67 100644 --- a/pkg/machine/compression/generic.go +++ b/pkg/machine/compression/generic.go @@ -10,10 +10,9 @@ import ( ) type genericDecompressor struct { - compressedFilePath string - compressedFile *os.File - decompressedFileReader io.ReadCloser - compressedFileInfo os.FileInfo + compressedFilePath string + compressedFile *os.File + compressedFileInfo os.FileInfo } func newGenericDecompressor(compressedFilePath string) (*genericDecompressor, error) { @@ -49,7 +48,11 @@ func (d *genericDecompressor) decompress(w WriteSeekCloser, r io.Reader) error { if err != nil { return err } - d.decompressedFileReader = decompressedFileReader + defer func() { + if err := decompressedFileReader.Close(); err != nil { + logrus.Errorf("Unable to close gz file: %q", err) + } + }() _, err = io.Copy(w, decompressedFileReader) return err @@ -59,10 +62,4 @@ func (d *genericDecompressor) close() { if err := d.compressedFile.Close(); err != nil { logrus.Errorf("Unable to close compressed file: %q", err) } - - if d.decompressedFileReader != nil { - if err := d.decompressedFileReader.Close(); err != nil { - logrus.Errorf("Unable to close uncompressed stream: %q", err) - } - } } diff --git a/pkg/machine/compression/gzip.go b/pkg/machine/compression/gzip.go index cd12ea0dea..f00830dc76 100644 --- a/pkg/machine/compression/gzip.go +++ b/pkg/machine/compression/gzip.go @@ -9,12 +9,11 @@ import ( type gzipDecompressor struct { genericDecompressor - gzReader io.ReadCloser } func newGzipDecompressor(compressedFilePath string) (*gzipDecompressor, error) { d, err := newGenericDecompressor(compressedFilePath) - return &gzipDecompressor{*d, nil}, err + return &gzipDecompressor{*d}, err } func (d *gzipDecompressor) decompress(w WriteSeekCloser, r io.Reader) error { @@ -22,16 +21,19 @@ func (d *gzipDecompressor) decompress(w WriteSeekCloser, r io.Reader) error { if err != nil { return err } - d.gzReader = gzReader + defer func() { + if err := gzReader.Close(); err != nil { + logrus.Errorf("Unable to close gz file: %q", err) + } + }() sparseWriter := NewSparseWriter(w) + defer func() { + if err := sparseWriter.Close(); err != nil { + logrus.Errorf("Unable to close uncompressed file: %q", err) + } + }() + _, err = io.Copy(sparseWriter, gzReader) return err } - -func (d *gzipDecompressor) close() { - if err := d.gzReader.Close(); err != nil { - logrus.Errorf("Unable to close gz file: %q", err) - } - d.genericDecompressor.close() -} diff --git a/pkg/machine/compression/testdata/gen-testdata.sh b/pkg/machine/compression/testdata/gen-testdata.sh new file mode 100755 index 0000000000..375b039e19 --- /dev/null +++ b/pkg/machine/compression/testdata/gen-testdata.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +echo "zstd" > hellozstd-withzeros && \ +truncate -c -s 10 hellozstd-withzeros && \ +zstd -f --sparse hellozstd-withzeros -o sample-withzeros.zst && \ +rm hellozstd-withzeros + +echo "zstd" > hellozstd && \ +zstd -f --sparse hellozstd -o sample.zst && \ +rm hellozstd + +echo "gzip" > hellogzip-withzeros && \ +truncate -c -s 10 hellogzip-withzeros && \ +gzip -f -c hellogzip-withzeros > sample-withzeros.gz && \ +rm hellogzip-withzeros + +echo "gzip" > hellogzip && \ +gzip -f -c hellogzip > sample.gz && \ +rm hellogzip + +echo "bzip2" > hellobzip2-withzeros && \ +truncate -c -s 10 hellobzip2-withzeros && \ +bzip2 -f -c hellobzip2-withzeros > sample-withzeros.bz2 && \ +rm hellobzip2-withzeros + +echo "bzip2" > hellobzip2 && \ +bzip2 -f -c hellobzip2 > sample.bz2 && \ +rm hellobzip2 + +echo "uncompressed" > sample-withzeros.uncompressed && \ +truncate -c -s 20 sample-withzeros.uncompressed + +echo "uncompressed" > sample.uncompressed + +echo "xz" > helloxz-withzeros && \ +truncate -c -s 10 helloxz-withzeros && \ +xz -f -z -c helloxz-withzeros > sample-withzeros.xz && \ +rm helloxz-withzeros + +echo "xz" > helloxz && \ +xz -f -z -c helloxz > sample.xz && \ +rm helloxz + +echo "zip" > hellozip-withzeros && \ +truncate -c -s 10 hellozip-withzeros && \ +zip sample-withzeros.zip hellozip-withzeros && \ +rm hellozip-withzeros + +echo "zip" > hellozip && \ +zip sample.zip hellozip && \ +rm hellozip diff --git a/pkg/machine/compression/testdata/sample-withzeros.bz2 b/pkg/machine/compression/testdata/sample-withzeros.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..782a681c6ca6ef41916345e58e82945091feaac6 GIT binary patch literal 49 zcmZ>Y%CIzaj8qGbbWD1FhJk_kWP^i%0E2*ngMb2qArHf4FdS)f~q^21Q0O1_p)_{ill=88|DdxPTle;8keiZF#$WF$1F#m#Eak Pbop(cYZ*auERj(FT3QgU literal 0 HcmV?d00001 diff --git a/pkg/machine/compression/testdata/sample-withzeros.zip b/pkg/machine/compression/testdata/sample-withzeros.zip new file mode 100644 index 0000000000000000000000000000000000000000..778730a2f603b5832635f7997f7bedc2659fce84 GIT binary patch literal 194 zcmWIWW@Zs#U|`^2D4h@y!Pdg~iv!5x0%9Qs8HSA1oSgis%mUr=%#w_%)S~?2&=5`r z<~+q)sUTch!Og(P@|BT+0c^(VQ#ucl7?>FXycwC~m~ojZ0W=!|mNbHBY<93h>_9U= Tz?+o~q=OL%BY<=Wh{FH?jaetv literal 0 HcmV?d00001 diff --git a/pkg/machine/compression/testdata/sample-withzeros.zst b/pkg/machine/compression/testdata/sample-withzeros.zst new file mode 100644 index 0000000000000000000000000000000000000000..6fac8c6631a34230cf50f40142f9d0e3ced77e8d GIT binary patch literal 23 ccmdPcs{dDoE0BSqs<FdS-{}E{*=xGCWiktafz%93=9C#LknvF literal 31 mcmb2|=HQT%I+V)5oRON7lh5G2{*=xGCWiktafz%93=9C1SqVY_ diff --git a/pkg/machine/compression/testdata/sample.zip b/pkg/machine/compression/testdata/sample.zip index 481aeadc5260f128ca9c1c3bb9cbef07f4be5841..a03bcd4bb0322a260884af73cb3daef645844a85 100644 GIT binary patch delta 105 zcmZ3&xQfvzz?+$civa{mCqzUn{;#!`1;_?r4h9*9jMSW*{Hn}?&=5`r<~+q)sX#nY eUqKKF5u)2wLGF*N@Gu(iffdK$$F&R?; literal 164 zcmWIWW@h1H0D&jTZV`+BYprDgvO$=YL53kCH76%OG=!6Zxmw~-Y7G#VR&X;gvV3I( zsu2Mys>&?j3h-uRl4HhYhy+j-0|QV!!;(f23u+`Q#7H#b0=!w-K#CZF&>KiQgE$NT Ds}vm! diff --git a/pkg/machine/compression/uncompressed.go b/pkg/machine/compression/uncompressed.go index 03ebfb4a41..8bb5ab43a5 100644 --- a/pkg/machine/compression/uncompressed.go +++ b/pkg/machine/compression/uncompressed.go @@ -2,6 +2,8 @@ package compression import ( "io" + + "github.com/sirupsen/logrus" ) type uncompressedDecompressor struct { @@ -15,6 +17,12 @@ func newUncompressedDecompressor(compressedFilePath string) (*uncompressedDecomp func (d *uncompressedDecompressor) decompress(w WriteSeekCloser, r io.Reader) error { sparseWriter := NewSparseWriter(w) + defer func() { + if err := sparseWriter.Close(); err != nil { + logrus.Errorf("Unable to close uncompressed file: %q", err) + } + }() + _, err := io.Copy(sparseWriter, r) return err } diff --git a/pkg/machine/compression/zstd.go b/pkg/machine/compression/zstd.go index bf234961ea..be3b1fd825 100644 --- a/pkg/machine/compression/zstd.go +++ b/pkg/machine/compression/zstd.go @@ -4,16 +4,16 @@ import ( "io" "github.com/klauspost/compress/zstd" + "github.com/sirupsen/logrus" ) type zstdDecompressor struct { genericDecompressor - zstdReader *zstd.Decoder } func newZstdDecompressor(compressedFilePath string) (*zstdDecompressor, error) { d, err := newGenericDecompressor(compressedFilePath) - return &zstdDecompressor{*d, nil}, err + return &zstdDecompressor{*d}, err } func (d *zstdDecompressor) decompress(w WriteSeekCloser, r io.Reader) error { @@ -21,14 +21,15 @@ func (d *zstdDecompressor) decompress(w WriteSeekCloser, r io.Reader) error { if err != nil { return err } - d.zstdReader = zstdReader + defer zstdReader.Close() sparseWriter := NewSparseWriter(w) + defer func() { + if err := sparseWriter.Close(); err != nil { + logrus.Errorf("Unable to close uncompressed file: %q", err) + } + }() + _, err = io.Copy(sparseWriter, zstdReader) return err } - -func (d *zstdDecompressor) close() { - d.zstdReader.Close() - d.genericDecompressor.close() -} From 24a33a538cf42c4e7e4d329f243d90bfefe6f614 Mon Sep 17 00:00:00 2001 From: Mario Loriedo Date: Tue, 27 Feb 2024 15:59:56 +0100 Subject: [PATCH 09/12] Move code for sparse optimized copy to a dedicated method Signed-off-by: Mario Loriedo --- pkg/machine/compression/generic.go | 10 ++++++++++ pkg/machine/compression/gzip.go | 10 +--------- pkg/machine/compression/uncompressed.go | 12 +----------- pkg/machine/compression/zstd.go | 11 +---------- 4 files changed, 13 insertions(+), 30 deletions(-) diff --git a/pkg/machine/compression/generic.go b/pkg/machine/compression/generic.go index 7f808c2d67..118a914fe9 100644 --- a/pkg/machine/compression/generic.go +++ b/pkg/machine/compression/generic.go @@ -63,3 +63,13 @@ func (d *genericDecompressor) close() { logrus.Errorf("Unable to close compressed file: %q", err) } } + +func (d *genericDecompressor) sparseOptimizedCopy(w WriteSeekCloser, r io.Reader) error { + var err error + sparseWriter := NewSparseWriter(w) + defer func() { + err = sparseWriter.Close() + }() + _, err = io.Copy(sparseWriter, r) + return err +} diff --git a/pkg/machine/compression/gzip.go b/pkg/machine/compression/gzip.go index f00830dc76..1df7a5f471 100644 --- a/pkg/machine/compression/gzip.go +++ b/pkg/machine/compression/gzip.go @@ -27,13 +27,5 @@ func (d *gzipDecompressor) decompress(w WriteSeekCloser, r io.Reader) error { } }() - sparseWriter := NewSparseWriter(w) - defer func() { - if err := sparseWriter.Close(); err != nil { - logrus.Errorf("Unable to close uncompressed file: %q", err) - } - }() - - _, err = io.Copy(sparseWriter, gzReader) - return err + return d.sparseOptimizedCopy(w, gzReader) } diff --git a/pkg/machine/compression/uncompressed.go b/pkg/machine/compression/uncompressed.go index 8bb5ab43a5..dd1a17b66d 100644 --- a/pkg/machine/compression/uncompressed.go +++ b/pkg/machine/compression/uncompressed.go @@ -2,8 +2,6 @@ package compression import ( "io" - - "github.com/sirupsen/logrus" ) type uncompressedDecompressor struct { @@ -16,13 +14,5 @@ func newUncompressedDecompressor(compressedFilePath string) (*uncompressedDecomp } func (d *uncompressedDecompressor) decompress(w WriteSeekCloser, r io.Reader) error { - sparseWriter := NewSparseWriter(w) - defer func() { - if err := sparseWriter.Close(); err != nil { - logrus.Errorf("Unable to close uncompressed file: %q", err) - } - }() - - _, err := io.Copy(sparseWriter, r) - return err + return d.sparseOptimizedCopy(w, r) } diff --git a/pkg/machine/compression/zstd.go b/pkg/machine/compression/zstd.go index be3b1fd825..33718a8035 100644 --- a/pkg/machine/compression/zstd.go +++ b/pkg/machine/compression/zstd.go @@ -4,7 +4,6 @@ import ( "io" "github.com/klauspost/compress/zstd" - "github.com/sirupsen/logrus" ) type zstdDecompressor struct { @@ -23,13 +22,5 @@ func (d *zstdDecompressor) decompress(w WriteSeekCloser, r io.Reader) error { } defer zstdReader.Close() - sparseWriter := NewSparseWriter(w) - defer func() { - if err := sparseWriter.Close(); err != nil { - logrus.Errorf("Unable to close uncompressed file: %q", err) - } - }() - - _, err = io.Copy(sparseWriter, zstdReader) - return err + return d.sparseOptimizedCopy(w, zstdReader) } From 59704665aea5a21d877a7c2e70c845a6f22a6690 Mon Sep 17 00:00:00 2001 From: Mario Loriedo Date: Tue, 27 Feb 2024 16:16:44 +0100 Subject: [PATCH 10/12] Avoid overriding io.Copy error Signed-off-by: Mario Loriedo --- pkg/machine/compression/generic.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/machine/compression/generic.go b/pkg/machine/compression/generic.go index 118a914fe9..775af78b44 100644 --- a/pkg/machine/compression/generic.go +++ b/pkg/machine/compression/generic.go @@ -68,7 +68,10 @@ func (d *genericDecompressor) sparseOptimizedCopy(w WriteSeekCloser, r io.Reader var err error sparseWriter := NewSparseWriter(w) defer func() { - err = sparseWriter.Close() + e := sparseWriter.Close() + if err != nil { + err = e + } }() _, err = io.Copy(sparseWriter, r) return err From 7b6d9a586e7857c10299d2ca1281272a4afaadec Mon Sep 17 00:00:00 2001 From: Mario Loriedo Date: Tue, 27 Feb 2024 16:48:47 +0100 Subject: [PATCH 11/12] Fixup: avoid overriding io.Copy error Signed-off-by: Mario Loriedo --- pkg/machine/compression/generic.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/machine/compression/generic.go b/pkg/machine/compression/generic.go index 775af78b44..236a18ea33 100644 --- a/pkg/machine/compression/generic.go +++ b/pkg/machine/compression/generic.go @@ -69,7 +69,7 @@ func (d *genericDecompressor) sparseOptimizedCopy(w WriteSeekCloser, r io.Reader sparseWriter := NewSparseWriter(w) defer func() { e := sparseWriter.Close() - if err != nil { + if e != nil && err == nil { err = e } }() From 8f02822c12646079ac618922ab18f75f7fe2232e Mon Sep 17 00:00:00 2001 From: Mario Loriedo Date: Wed, 28 Feb 2024 15:37:15 +0100 Subject: [PATCH 12/12] Remove xz unit tests as they are flaky Signed-off-by: Mario Loriedo --- pkg/machine/compression/compression_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/pkg/machine/compression/compression_test.go b/pkg/machine/compression/compression_test.go index 9c51cd8bca..8230746972 100644 --- a/pkg/machine/compression/compression_test.go +++ b/pkg/machine/compression/compression_test.go @@ -112,8 +112,6 @@ func Test_Decompress(t *testing.T) { }{ {name: "zip", args: args{src: "./testdata/sample.zip", dst: "./testdata/hellozip"}, want: "zip\n"}, {name: "zip with trailing zeros", args: args{src: "./testdata/sample-withzeros.zip", dst: "./testdata/hellozip-withzeros"}, want: "zip\n\x00\x00\x00\x00\x00\x00"}, - {name: "xz", args: args{src: "./testdata/sample.xz", dst: "./testdata/helloxz"}, want: "xz\n"}, - {name: "xz with trailing zeros", args: args{src: "./testdata/sample-withzeros.xz", dst: "./testdata/helloxz-withzeros"}, want: "xz\n\x00\x00\x00\x00\x00\x00\x00"}, {name: "gzip", args: args{src: "./testdata/sample.gz", dst: "./testdata/hellogz"}, want: "gzip\n"}, {name: "gzip with trailing zeros", args: args{src: "./testdata/sample-withzeros.gz", dst: "./testdata/hellogzip-withzeros"}, want: "gzip\n\x00\x00\x00\x00\x00"}, {name: "bzip2", args: args{src: "./testdata/sample.bz2", dst: "./testdata/hellobz2"}, want: "bzip2\n"},