Skip to content

Commit

Permalink
Fix race conditions with archive package.
Browse files Browse the repository at this point in the history
  • Loading branch information
bengarrett committed Feb 5, 2024
1 parent 27469c4 commit 33c8e30
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 82 deletions.
4 changes: 2 additions & 2 deletions handler/app/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -556,9 +556,9 @@ func (got *DemozooLink) Stat(c echo.Context, downloadDir string) error {

func (got *DemozooLink) ArchiveContent(c echo.Context, path string) error {
//return c.JSON(http.StatusOK, got)
files, _, err := archive.Content(path, got.Filename)
files, err := archive.Content(path, got.Filename)
if err != nil {
return c.JSON(http.StatusOK, got) // todo, handle unsupported archive types else return
return c.JSON(http.StatusOK, got)
}
got.Content = strings.Join(files, "\n")
return c.JSON(http.StatusOK, got)
Expand Down
92 changes: 41 additions & 51 deletions internal/archive/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ const (
// 7z,arc,ark,arj,cab,gz,lha,lzh,rar,tar,tar.gz,zip.
arjx = ".arj" // Archived by Robert Jung
lhax = ".lha" // LHarc by Haruyasu Yoshizaki (Yoshi)
lhzx = ".lzh" // LHArc by Haruyasu Yoshizaki (Yoshi)
rarx = ".rar" // Roshal ARchive by Alexander Roshal
zipx = ".zip" // Phil Katz's ZIP for MSDOS systems
)
Expand Down Expand Up @@ -63,6 +64,10 @@ func ARJItem(s string) bool {
// CheckyCharset checks the byte slice for valid UTF-8 encoding.
// If the byte slice is not valid UTF-8, it will attempt to decode
// the byte slice using the MS-DOS era, IBM CP-437 character set.
//
// This is a historical oddity with BBS file archives, where the
// file names were encoded using elite-speek and other untypable
// characters.
func CheckyCharset(b []byte) string {
if utf8.Valid(b) {
return string(b)
Expand All @@ -83,59 +88,27 @@ func CheckyCharset(b []byte) string {
// An absolute path is required by src that points to the archive file named as a unique id.
//
// The original archive filename with extension is required to determine text compression format.
func Content(src, filename string) ([]string, string, error) {
func Content(src, filename string) ([]string, error) {
st, err := os.Stat(src)
if errors.Is(err, fs.ErrNotExist) {
return nil, "", fmt.Errorf("read %s: %w", filepath.Base(src), ErrFile)
return nil, fmt.Errorf("read %s: %w", filepath.Base(src), ErrFile)
}
if st.IsDir() {
return nil, "", fmt.Errorf("read %s: %w", filepath.Base(src), ErrDir)
}
files, fname, err := Readr(src, filename)
if err != nil {
return nil, "", fmt.Errorf("read uuid/filename: %w", err)
}
return files, fname, nil
}

// Readr returns both a list of files within an rar, tar or zip archive,
// and a suitable archive filename string.
// If there are problems reading the archive due to an incorrect filename
// extension, the returned filename string will be corrected.
func Readr(src, filename string) ([]string, string, error) {
files, err := readr(src, filename)
if err != nil {
fmt.Println("readr error:", err)
return readCommand(src, filename)
}
return files, filename, nil
}

func readCommand(src, filename string) ([]string, string, error) {
files, ext, err := Readr(src, filename)
if errors.Is(err, ErrWrongExt) {
newname := Rename(ext, filename)
files, err = readr(src, newname)
if err != nil {
return nil, "", fmt.Errorf("readr fix: %w", err)
}
return files, newname, nil
return nil, fmt.Errorf("read %s: %w", filepath.Base(src), ErrDir)
}
files, err := walker(src, filename)
if err != nil {
return nil, "", fmt.Errorf("readr: %w", err)
return commander(src, filename)
}
// remove empty entries
files = slices.DeleteFunc(files, func(s string) bool {
return strings.TrimSpace(s) == ""
})
return files, filename, nil
return files, nil
}

func readr(src, filename string) ([]string, error) {
// walker uses the mholt/archiver package to walk the src archive file.
func walker(src, filename string) ([]string, error) {
name := strings.ToLower(filename) // ByExtension is case sensitive
format, err := archiver.ByExtension(name)
if err != nil {
return nil, fmt.Errorf("by extension: %w", err)
return nil, err
}
w, ok := format.(archiver.Walker)
if !ok {
Expand All @@ -159,6 +132,20 @@ func readr(src, filename string) ([]string, error) {
return files, err
}

// commander uses system archiver and decompression programs to read the src archive file.
func commander(src, filename string) ([]string, error) {
c := Contents{}
if err := c.Read(src, filename); err != nil {
return nil, fmt.Errorf("commander failed with %s (%q): %w", filename, c.Ext, err)
}
// remove empty entries
files := c.Files
files = slices.DeleteFunc(files, func(s string) bool {
return strings.TrimSpace(s) == ""
})
return files, nil
}

// Extract the targets file from src archive to the destination folder.
// The archive format is selected implicitly.
//
Expand Down Expand Up @@ -253,21 +240,24 @@ func MagicExt(src string) (string, error) {
}
s := strings.Split(strings.ToLower(string(out)), ",")
magic := strings.TrimSpace(s[0])
if MagicLHA(magic) {
return lhax, nil
}
for magic, ext := range magics {
if strings.TrimSpace(s[0]) == magic {
return ext, nil
}
}
if MagicLHA(magic) {
return lhax, nil
}
return "", fmt.Errorf("%w: %q", ErrMagic, magic)
}

// MagicLHA returns true if the LHA file type is matched in the magic string.
func MagicLHA(magic string) bool {
s := strings.Split(magic, " ")
const lha = "lha"
const lha, lharc = "lha", "lharc"
if s[0] == lharc {
return true
}
if s[0] != lha {
return false
}
Expand Down Expand Up @@ -313,14 +303,14 @@ func (c *Contents) Read(src, filename string) error {
if err != nil {
return fmt.Errorf("system reader: %w", err)
}
if !strings.EqualFold(ext, filepath.Ext(filename)) {
// retry using correct filename extension
return fmt.Errorf("system reader: %w", ErrWrongExt)
}
// if !strings.EqualFold(ext, filepath.Ext(filename)) {
// // retry using correct filename extension
// return fmt.Errorf("system reader: %w", ErrWrongExt)
// }
switch strings.ToLower(ext) {
case arjx:
return c.ARJ(src)
case lhax:
case lhax, lhzx:
return c.LHA(src)
case rarx:
return c.Rar(src)
Expand Down Expand Up @@ -497,7 +487,7 @@ func (x Extractor) Extract(targets ...string) error {
switch ext {
case arjx:
return x.ARJ(targets...)
case lhax:
case lhax, lhzx:
return x.LHA(targets...)
case zipx:
return x.Zip(targets...)
Expand Down
57 changes: 28 additions & 29 deletions internal/archive/archive_test.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
package archive_test

// It is highly recommended to run these tests with -race flag to detect
// race conditions.
//
// go test -timeout 30s -count 5 -race github.com/Defacto2/server/internal/archive

import (
"os"
"path/filepath"
Expand All @@ -24,81 +29,75 @@ func td(name string) string {

func TestContent(t *testing.T) {
t.Parallel()
files, name, err := archive.Content("", "")
files, err := archive.Content("", "")
assert.Error(t, err)
assert.Empty(t, files)
assert.Empty(t, name)

files, name, err = archive.Content(td(""), "")
files, err = archive.Content(td(""), "")
assert.Error(t, err)
assert.Empty(t, files)
assert.Empty(t, name)

// test a deflated zip file
files, name, err = archive.Content(td("PKZ204EX.ZIP"), "PKZ204EX.ZIP")
finename := "PKZ204EX.ZIP"
files, err = archive.Content(td(finename), finename)
assert.Nil(t, err)
assert.Len(t, files, 15)
assert.Equal(t, "PKZ204EX.ZIP", name)

// test the tar handler
files, name, err = archive.Content(td("TAR135.TAR"), "TAR135.TAR")
finename = "TAR135.TAR"
files, err = archive.Content(td(finename), finename)
assert.Nil(t, err)
assert.Len(t, files, 15)
assert.Equal(t, "TAR135.TAR", name)

// test the rar handler
files, name, err = archive.Content(td("RAR624.RAR"), "RAR624.RAR")
finename = "RAR624.RAR"
files, err = archive.Content(td(finename), finename)
assert.Nil(t, err)
assert.Len(t, files, 15)
assert.Equal(t, "RAR624.RAR", name)

// test the tar.gz handler
files, name, err = archive.Content(td("TAR135.GZ"), "TAR135.TAR.GZ")
finename = "TAR135.TAR.GZ"
files, err = archive.Content(td("TAR135.GZ"), finename)
assert.Nil(t, err)
assert.Len(t, files, 15)
assert.Equal(t, "TAR135.TAR.GZ", name)

// test an arj file
files, name, err = archive.Content(td("ARJ310.ARJ"), "ARJ310.ARJ")
files, err = archive.Content(td("ARJ310.ARJ"), "ARJ310.ARJ")
assert.Nil(t, err)
assert.Len(t, files, 15)
assert.Equal(t, "ARJ310.ARJ", name)

// test an unsupported arc file
files, name, err = archive.Content(td("ARC521P.ARC"), "ARC521P.ARC")
files, err = archive.Content(td("ARC521P.ARC"), "ARC521P.ARC")
assert.Error(t, err)
assert.Empty(t, files)
assert.Empty(t, name)

// test a legacy shrunk archive
files, name, err = archive.Content(td("PKZ80A1.ZIP"), "PKZ80A1.ZIP")
finename = "PKZ80A1.ZIP"
files, err = archive.Content(td(finename), finename)
assert.Nil(t, err)
assert.Len(t, files, 15)
assert.Equal(t, "PKZ80A1.ZIP", name)

// test an unsupported 7z file
files, name, err = archive.Content(td("TEST.7z"), "TEST.7z")
files, err = archive.Content(td("TEST.7z"), "TEST.7z")
assert.Error(t, err)
assert.Empty(t, files)
assert.Empty(t, name)

// test a xz archive
files, name, err = archive.Content(td("TEST.tar.xz"), "TEST.tar.xz")
finename = "TEST.tar.xz"
files, err = archive.Content(td(finename), finename)
assert.Nil(t, err)
assert.Len(t, files, 15)
assert.Equal(t, "TEST.tar.xz", name)

// test an unsupported lha archive
files, name, err = archive.Content(td("LHA114.LZH"), "LHA114.LZH")
assert.Error(t, err)
assert.Empty(t, files)
assert.Empty(t, name)
finename = "LHA114.LZH"
files, err = archive.Content(td(finename), finename)
assert.Nil(t, err)
assert.Len(t, files, 15)

// test non-latin text
files, name, err = archive.Content(td("τεχτƒιℓε.zip"), "τεχτƒιℓε.zip")
finename = "τεχτƒιℓε.zip"
files, err = archive.Content(td(finename), finename)
assert.Nil(t, err)
assert.Len(t, files, 1)
assert.Equal(t, "τεχτƒιℓε.zip", name)
}

func TestExtractAll(t *testing.T) {
Expand Down

0 comments on commit 33c8e30

Please sign in to comment.