Skip to content

Commit

Permalink
Merge pull request #132 from paketo-buildpacks/extract-archive
Browse files Browse the repository at this point in the history
Consolidate extract file methods
  • Loading branch information
Daniel Mikusa authored Mar 16, 2022
2 parents 8cb61a9 + 8979ae3 commit c4a5099
Show file tree
Hide file tree
Showing 7 changed files with 238 additions and 4 deletions.
82 changes: 78 additions & 4 deletions crush/crush.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package crush
import (
"archive/tar"
"archive/zip"
"bytes"
"compress/bzip2"
"compress/gzip"
"fmt"
Expand All @@ -27,6 +28,7 @@ import (
"path/filepath"
"strings"

"github.com/h2non/filetype"
"github.com/xi2/xz"
)

Expand Down Expand Up @@ -101,9 +103,63 @@ func CreateTarGz(destination io.Writer, source string) error {
return CreateTar(gz, source)
}

// Extract decompresses and extract source files to a destination directory or path. For archives, an arbitrary number of top-level directory
// components can be stripped from each path.
func Extract(source io.Reader, destination string, stripComponents int) error {
buf := &bytes.Buffer{}

kind, err := filetype.MatchReader(io.TeeReader(source, buf))
if err != nil {
return err
}

source = io.MultiReader(buf, source)

switch kind.MIME.Value {
case "application/x-tar":
return extractTar(source, destination, stripComponents)
case "application/zip":
return extractZip(source, destination, stripComponents)
case "application/x-bzip2":
return Extract(bzip2.NewReader(source), destination, stripComponents)
case "application/gzip":
gz, err := gzip.NewReader(source)
if err != nil {
return fmt.Errorf("unable to create GZIP reader\n%w", err)
}
defer gz.Close()
return Extract(gz, destination, stripComponents)
case "application/x-xz":
xz, err := xz.NewReader(source, 0)
if err != nil {
return fmt.Errorf("unable to create XZ reader\n%w", err)
}
return Extract(xz, destination, stripComponents)
default:
// no archive, can happen with xz/gzip/bz2 if compressed file is not an archive
in, err := os.Create(destination)
if err != nil {
return fmt.Errorf("unable to open %s\n%w", destination, err)
}
defer in.Close()

if _, err := io.Copy(in, source); err != nil {
return fmt.Errorf("unable to copy to %s\n%w", destination, err)
}
}

return nil
}

// ExtractTar extracts source TAR file to a destination directory. An arbitrary number of top-level directory
// components can be stripped from each path.
//
// Deprecated: use Extract instead
func ExtractTar(source io.Reader, destination string, stripComponents int) error {
return extractTar(source, destination, stripComponents)
}

func extractTar(source io.Reader, destination string, stripComponents int) error {
t := tar.NewReader(source)

for {
Expand Down Expand Up @@ -140,12 +196,16 @@ func ExtractTar(source io.Reader, destination string, stripComponents int) error

// ExtractTarBz2 extracts source BZIP2'd TAR file to a destination directory. An arbitrary number of top-level
// directory components can be stripped from each path.
//
// Deprecated: use Extract instead
func ExtractTarBz2(source io.Reader, destination string, stripComponents int) error {
return ExtractTar(bzip2.NewReader(source), destination, stripComponents)
}

// ExtractTarGz extracts source GZIP'd TAR file to a destination directory. An arbitrary number of top-level directory
// components can be stripped from each path.
//
// Deprecated: use Extract instead
func ExtractTarGz(source io.Reader, destination string, stripComponents int) error {
gz, err := gzip.NewReader(source)
if err != nil {
Expand All @@ -158,6 +218,8 @@ func ExtractTarGz(source io.Reader, destination string, stripComponents int) err

// ExtractTarXz extracts source XZ'd TAR file to a destination directory. An arbitrary number of top-level directory
// components can be stripped from each path.
//
// Deprecated: use Extract instead
func ExtractTarXz(source io.Reader, destination string, stripComponents int) error {
xz, err := xz.NewReader(source, 0)
if err != nil {
Expand All @@ -169,13 +231,25 @@ func ExtractTarXz(source io.Reader, destination string, stripComponents int) err

// ExtractZip extracts source ZIP file to a destination directory. An arbitrary number of top-level directory
// components can be stripped from each path.
func ExtractZip(source *os.File, destination string, stripComponents int) error {
stat, err := source.Stat()
//
// Deprecated: use Extract instead
func ExtractZip(source io.Reader, destination string, stripComponents int) error {
return extractZip(source, destination, stripComponents)
}

func extractZip(source io.Reader, destination string, stripComponents int) error {
buffer, err := os.CreateTemp("", "")
if err != nil {
return fmt.Errorf("unable to stat %s\n%w", source.Name(), err)
return err
}
defer os.Remove(buffer.Name())

size, err := io.Copy(buffer, source)
if err != nil {
return err
}

z, err := zip.NewReader(source, stat.Size())
z, err := zip.NewReader(buffer, size)
if err != nil {
return err
}
Expand Down
157 changes: 157 additions & 0 deletions crush/crush_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,5 +223,162 @@ func testCrush(t *testing.T, context spec.G, it spec.S) {
Expect(filepath.Join(path, "fileC.txt")).To(BeARegularFile())
})
})

context("Extract", func() {
context("Tar", func() {
it.Before(func() {
var err error
in, err = os.Open(filepath.Join("testdata", "test-archive.tar"))
Expect(err).NotTo(HaveOccurred())
})

it("extracts the archive", func() {
Expect(crush.Extract(in, path, 0)).To(Succeed())
Expect(filepath.Join(path, "fileA.txt")).To(BeARegularFile())
Expect(filepath.Join(path, "dirA", "fileB.txt")).To(BeARegularFile())
Expect(filepath.Join(path, "dirA", "fileC.txt")).To(BeARegularFile())
})

it("skips stripped components", func() {
Expect(crush.Extract(in, path, 1)).To(Succeed())
Expect(filepath.Join(path, "fileB.txt")).To(BeARegularFile())
Expect(filepath.Join(path, "fileC.txt")).To(BeARegularFile())
})
})

context("TarGZ", func() {
it.Before(func() {
var err error
in, err = os.Open(filepath.Join("testdata", "test-archive.tar.gz"))
Expect(err).NotTo(HaveOccurred())
})

it("extracts the archive", func() {
Expect(crush.Extract(in, path, 0)).To(Succeed())
Expect(filepath.Join(path, "fileA.txt")).To(BeARegularFile())
Expect(filepath.Join(path, "dirA", "fileB.txt")).To(BeARegularFile())
Expect(filepath.Join(path, "dirA", "fileC.txt")).To(BeARegularFile())
})

it("skips stripped components", func() {
Expect(crush.Extract(in, path, 1)).To(Succeed())
Expect(filepath.Join(path, "fileB.txt")).To(BeARegularFile())
Expect(filepath.Join(path, "fileC.txt")).To(BeARegularFile())
})
})

context("TarBz2", func() {
it.Before(func() {
var err error
in, err = os.Open(filepath.Join("testdata", "test-archive.tar.bz2"))
Expect(err).NotTo(HaveOccurred())
})

it("extracts the archive", func() {
Expect(crush.Extract(in, path, 0)).To(Succeed())
Expect(filepath.Join(path, "fileA.txt")).To(BeARegularFile())
Expect(filepath.Join(path, "dirA", "fileB.txt")).To(BeARegularFile())
Expect(filepath.Join(path, "dirA", "fileC.txt")).To(BeARegularFile())
})

it("skips stripped components", func() {
Expect(crush.Extract(in, path, 1)).To(Succeed())
Expect(filepath.Join(path, "fileB.txt")).To(BeARegularFile())
Expect(filepath.Join(path, "fileC.txt")).To(BeARegularFile())
})
})

context("TarXZ", func() {
it.Before(func() {
var err error
in, err = os.Open(filepath.Join("testdata", "test-archive.tar.xz"))
Expect(err).NotTo(HaveOccurred())
})

it("extracts the archive", func() {
Expect(crush.Extract(in, path, 0)).To(Succeed())
Expect(filepath.Join(path, "fileA.txt")).To(BeARegularFile())
Expect(filepath.Join(path, "dirA", "fileB.txt")).To(BeARegularFile())
Expect(filepath.Join(path, "dirA", "fileC.txt")).To(BeARegularFile())
})

it("skips stripped components", func() {
Expect(crush.Extract(in, path, 1)).To(Succeed())
Expect(filepath.Join(path, "fileB.txt")).To(BeARegularFile())
Expect(filepath.Join(path, "fileC.txt")).To(BeARegularFile())
})
})

context("Zip", func() {
it.Before(func() {
var err error
in, err = os.Open(filepath.Join("testdata", "test-archive.zip"))
Expect(err).NotTo(HaveOccurred())
})

it("extracts the archive", func() {
Expect(crush.Extract(in, path, 0)).To(Succeed())
Expect(filepath.Join(path, "fileA.txt")).To(BeARegularFile())
Expect(filepath.Join(path, "dirA", "fileB.txt")).To(BeARegularFile())
Expect(filepath.Join(path, "dirA", "fileC.txt")).To(BeARegularFile())
})

it("skips stripped components", func() {
Expect(crush.Extract(in, path, 1)).To(Succeed())
Expect(filepath.Join(path, "fileB.txt")).To(BeARegularFile())
Expect(filepath.Join(path, "fileC.txt")).To(BeARegularFile())
})
})

context("Tar", func() {
it.Before(func() {
var err error
in, err = os.Open(filepath.Join("testdata", "test-archive.tar"))
Expect(err).NotTo(HaveOccurred())
})

it("extracts the archive", func() {
Expect(crush.Extract(in, path, 0)).To(Succeed())
Expect(filepath.Join(path, "fileA.txt")).To(BeARegularFile())
Expect(filepath.Join(path, "dirA", "fileB.txt")).To(BeARegularFile())
Expect(filepath.Join(path, "dirA", "fileC.txt")).To(BeARegularFile())
})

it("skips stripped components", func() {
Expect(crush.Extract(in, path, 1)).To(Succeed())
Expect(filepath.Join(path, "fileB.txt")).To(BeARegularFile())
Expect(filepath.Join(path, "fileC.txt")).To(BeARegularFile())
})
})

context("compression only", func() {
it("decompresses gzip", func() {
var err error
in, err = os.Open(filepath.Join("testdata", "test-compress.gz"))
Expect(err).NotTo(HaveOccurred())

Expect(crush.Extract(in, filepath.Join(path, "test-compress"), 0)).To(Succeed())
Expect(filepath.Join(path, "test-compress")).To(BeARegularFile())
})

it("decompresses xz", func() {
var err error
in, err = os.Open(filepath.Join("testdata", "test-compress.xz"))
Expect(err).NotTo(HaveOccurred())

Expect(crush.Extract(in, filepath.Join(path, "test-compress"), 0)).To(Succeed())
Expect(filepath.Join(path, "test-compress")).To(BeARegularFile())
})

it("decompresses bz2", func() {
var err error
in, err = os.Open(filepath.Join("testdata", "test-compress.bz2"))
Expect(err).NotTo(HaveOccurred())

Expect(crush.Extract(in, filepath.Join(path, "test-compress"), 0)).To(Succeed())
Expect(filepath.Join(path, "test-compress")).To(BeARegularFile())
})
})
})
})
}
Binary file added crush/testdata/test-compress.bz2
Binary file not shown.
Binary file added crush/testdata/test-compress.gz
Binary file not shown.
Binary file added crush/testdata/test-compress.xz
Binary file not shown.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require (
github.com/Masterminds/semver/v3 v3.1.1
github.com/buildpacks/libcnb v1.25.5
github.com/creack/pty v1.1.17
github.com/h2non/filetype v1.1.3
github.com/heroku/color v0.0.6
github.com/imdario/mergo v0.3.12
github.com/mitchellh/hashstructure/v2 v2.0.2
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/h2non/filetype v1.1.3 h1:FKkx9QbD7HR/zjK1Ia5XiBsq9zdLi5Kf3zGyFTAFkGg=
github.com/h2non/filetype v1.1.3/go.mod h1:319b3zT68BvV+WRj7cwy856M2ehB3HqNOt6sy1HndBY=
github.com/heroku/color v0.0.6 h1:UTFFMrmMLFcL3OweqP1lAdp8i1y/9oHqkeHjQ/b/Ny0=
github.com/heroku/color v0.0.6/go.mod h1:ZBvOcx7cTF2QKOv4LbmoBtNl5uB17qWxGuzZrsi1wLU=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
Expand Down

0 comments on commit c4a5099

Please sign in to comment.