Skip to content

Commit

Permalink
Bug fixes for extractors.
Browse files Browse the repository at this point in the history
  • Loading branch information
bengarrett committed Feb 5, 2024
1 parent 33c8e30 commit a890b70
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 68 deletions.
104 changes: 43 additions & 61 deletions internal/archive/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ var (
ErrUnknownExt = errors.New("the archive uses an unsupported file extension")
)

// ArjItem returns true if the string is a row from an ARJ list.
// ArjItem returns true if the string is a row from the [arj program] list command.
//
// [arj program]: https://arj.sourceforge.net/
func ARJItem(s string) bool {
const minLen = 6
if len(s) < minLen {
Expand All @@ -61,14 +63,16 @@ func ARJItem(s string) bool {
return true
}

// CheckyCharset checks the byte slice for valid UTF-8 encoding.
// If the byte slice is not valid UTF-8, it will attempt to decode
// the byte slice using the MS-DOS era, IBM CP-437 character set.
// CheckyPath checks the byte slice for valid UTF-8 encoding.
// If the byte slice is not valid, it will attempt to decode
// the byte slice using the MS-DOS era, [charmap.CodePage437] character set.
//
// This is a historical oddity with BBS file archives, where the
// file names were encoded using elite-speek and other untypable
// This is for historical oddities with BBS file archives, where the
// item names were encoded using [leetspeak] and other untypable
// characters.
func CheckyCharset(b []byte) string {
//
// [leetspeak]: https://www.oed.com/dictionary/leetspeak_n
func CheckyPath(b []byte) string {
if utf8.Valid(b) {
return string(b)
}
Expand All @@ -80,14 +84,10 @@ func CheckyCharset(b []byte) string {
return string(result)
}

// Content returns both a list of files within an rar, tar, or zip archive;
// as-well as a suitable filename string for the archive. This filename is
// useful when the original archive filename has been given an invalid file
// extension.
//
// An absolute path is required by src that points to the archive file named as a unique id.
// Content returns a list of files within an rar, tar, lha, or zip archive.
// This filename extension is used to determine the archive format.
//
// The original archive filename with extension is required to determine text compression format.
// An absolute path is required by src that points to the source archive file.
func Content(src, filename string) ([]string, error) {
st, err := os.Stat(src)
if errors.Is(err, fs.ErrNotExist) {
Expand Down Expand Up @@ -122,7 +122,7 @@ func walker(src, filename string) ([]string, error) {
if strings.TrimSpace(f.Name()) == "" {
return nil
}
name := CheckyCharset([]byte(f.Name()))
name := CheckyPath([]byte(f.Name()))
files = append(files, name)
return nil
})
Expand All @@ -146,67 +146,49 @@ func commander(src, filename string) ([]string, error) {
return files, nil
}

// Extract the targets file from src archive to the destination folder.
// The archive format is selected implicitly.
//
// Archiver relies on the filename extension to determine which
// decompression format to use, which must be supplied using filename.
func Extract(src, dst, filename, targets string) error {
// Extract the filename targets from the source archive file to the destination folder.
// If no targets are provided, all files are extracted.
// This filename extension is used to determine the archive format.
func Extract(src, dst, filename string, targets ...string) error {
name := strings.ToLower(filename)
f, err := archiver.ByExtension(name)
if err != nil {
return fmt.Errorf("extract %q: %w", name, err)
}
format, ok := f.(archiver.Extractor)
if !ok {
return fmt.Errorf("extract %s (%T): %w", name, f, ErrArchive)
return extractor(src, dst, filename, targets...)
}
// recover from panic caused by mholt/archiver.
defer func() {
if r := recover(); r != nil {
err = fmt.Errorf("extract panic %s: %v", name, r)
}
}()
// err := format.Extract(ctx, input, fileList, handler)
if err := format.Extract(src, targets, dst); err != nil {
// second attempt at extraction using a system archiver program
x := Extractor{Source: src, Destination: dst, OriginalName: filename}
if err := x.Extract(targets); err != nil {
return fmt.Errorf("command extract: %w", err)
extractAll := len(targets) == 0
if extractAll {
all, ok := f.(archiver.Unarchiver)
if !ok {
return fmt.Errorf("extract all %s (%T): %w", filename, f, ErrArchive)
}
if err = all.Unarchive(src, dst); err == nil {
return nil
}
} else {
target, ok := f.(archiver.Extractor)
if !ok {
return fmt.Errorf("extract %s (%T): %w", name, f, ErrArchive)
}
t := strings.Join(targets, " ")
if err = target.Extract(src, t, dst); err == nil {
return nil
}
return fmt.Errorf("extract: %w", err)
}
return nil
return extractor(src, dst, filename, targets...)
}

// ExtractAll decompresses the given archive file into the destination folder.
// The archive format is selected implicitly.
//
// Archiver relies on the filename extension to determine which
// decompression format to use, which must be supplied using filename.
func ExtractAll(src, dst, filename string) error {
name := strings.ToLower(filename)
f, err := archiver.ByExtension(name)
// extractor second attempt at extraction using a system archiver program
func extractor(src, dst, filename string, targets ...string) error {
x := Extractor{Source: src, Destination: dst, OriginalName: filename}
err := x.Extract(targets...)
if err != nil {
return fmt.Errorf("%s: %w", filename, err)
}
format, ok := f.(archiver.Unarchiver)
if !ok {
return fmt.Errorf("extract all %s (%T): %w", filename, f, ErrArchive)
}
// recover from panic caused by mholt/archiver.
defer func() {
if r := recover(); r != nil {
err = fmt.Errorf("extract all panic %s: %v", name, r)
}
}()
if err := format.Unarchive(src, dst); err != nil {
// second attempt at extraction using a system archiver program
x := Extractor{Source: src, Destination: dst, OriginalName: filename}
if err := x.Extract(); err != nil {
return fmt.Errorf("command extract all: %w", err)
}
return fmt.Errorf("extract all: %w", err)
return fmt.Errorf("command extract: %w", err)
}
return nil
}
Expand Down
26 changes: 19 additions & 7 deletions internal/archive/archive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ package archive_test
//
// go test -timeout 30s -count 5 -race github.com/Defacto2/server/internal/archive

// TODO: TEST ALL assets for extract all files using an array of file names.

import (
"os"
"path/filepath"
Expand Down Expand Up @@ -102,28 +104,38 @@ func TestContent(t *testing.T) {

func TestExtractAll(t *testing.T) {
t.Parallel()
err := archive.ExtractAll("", "", "")
err := archive.Extract("", "", "")
assert.Error(t, err)

err = archive.ExtractAll(td(""), "", "")
err = archive.Extract(td(""), "", "")
assert.Error(t, err)

err = archive.ExtractAll(td(""), os.TempDir(), "")
err = archive.Extract(td(""), os.TempDir(), "")
assert.Error(t, err)

err = archive.ExtractAll(td("PKZ204EX.ZIP"), os.TempDir(), "")
err = archive.Extract(td("PKZ204EX.ZIP"), os.TempDir(), "")
assert.Error(t, err)

err = archive.ExtractAll(td("PKZ204EX.ZIP"), os.TempDir(), "test.exe")
err = archive.Extract(td("PKZ204EX.ZIP"), os.TempDir(), "test.exe")
assert.Error(t, err)

tmp, err := os.MkdirTemp("", "testextractall-")
assert.NoError(t, err)
defer os.RemoveAll(tmp)

err = archive.ExtractAll(td("PKZ204EX.ZIP"), tmp, "PKZ204EX.ZIP")
err = archive.Extract(td("PKZ204EX.ZIP"), tmp, "PKZ204EX.ZIP")
assert.NoError(t, err)

defer os.RemoveAll(tmp)
tmp1, err := os.MkdirTemp("", "testextractall1-")
assert.NoError(t, err)
defer os.RemoveAll(tmp1)

name := "ARJ310.ARJ"
err = archive.Extract(td(name), tmp1, name)
assert.NoError(t, err)
count, err := helper.Count(tmp1)
assert.NoError(t, err)
assert.Equal(t, 15, count)
}

func TestExtract(t *testing.T) {
Expand Down

0 comments on commit a890b70

Please sign in to comment.