diff --git a/handler/app/context.go b/handler/app/context.go index 2331b40c..274c4d95 100644 --- a/handler/app/context.go +++ b/handler/app/context.go @@ -556,9 +556,9 @@ func (got *DemozooLink) Stat(c echo.Context, downloadDir string) error { func (got *DemozooLink) ArchiveContent(c echo.Context, path string) error { //return c.JSON(http.StatusOK, got) - files, _, err := archive.Content(path, got.Filename) + files, err := archive.Content(path, got.Filename) if err != nil { - return c.JSON(http.StatusOK, got) // todo, handle unsupported archive types else return + return c.JSON(http.StatusOK, got) } got.Content = strings.Join(files, "\n") return c.JSON(http.StatusOK, got) diff --git a/internal/archive/archive.go b/internal/archive/archive.go index 257bb280..c33ee12e 100644 --- a/internal/archive/archive.go +++ b/internal/archive/archive.go @@ -25,6 +25,7 @@ const ( // 7z,arc,ark,arj,cab,gz,lha,lzh,rar,tar,tar.gz,zip. arjx = ".arj" // Archived by Robert Jung lhax = ".lha" // LHarc by Haruyasu Yoshizaki (Yoshi) + lhzx = ".lzh" // LHArc by Haruyasu Yoshizaki (Yoshi) rarx = ".rar" // Roshal ARchive by Alexander Roshal zipx = ".zip" // Phil Katz's ZIP for MSDOS systems ) @@ -63,6 +64,10 @@ func ARJItem(s string) bool { // CheckyCharset checks the byte slice for valid UTF-8 encoding. // If the byte slice is not valid UTF-8, it will attempt to decode // the byte slice using the MS-DOS era, IBM CP-437 character set. +// +// This is a historical oddity with BBS file archives, where the +// file names were encoded using elite-speek and other untypable +// characters. func CheckyCharset(b []byte) string { if utf8.Valid(b) { return string(b) @@ -83,59 +88,27 @@ func CheckyCharset(b []byte) string { // An absolute path is required by src that points to the archive file named as a unique id. // // The original archive filename with extension is required to determine text compression format. -func Content(src, filename string) ([]string, string, error) { +func Content(src, filename string) ([]string, error) { st, err := os.Stat(src) if errors.Is(err, fs.ErrNotExist) { - return nil, "", fmt.Errorf("read %s: %w", filepath.Base(src), ErrFile) + return nil, fmt.Errorf("read %s: %w", filepath.Base(src), ErrFile) } if st.IsDir() { - return nil, "", fmt.Errorf("read %s: %w", filepath.Base(src), ErrDir) - } - files, fname, err := Readr(src, filename) - if err != nil { - return nil, "", fmt.Errorf("read uuid/filename: %w", err) - } - return files, fname, nil -} - -// Readr returns both a list of files within an rar, tar or zip archive, -// and a suitable archive filename string. -// If there are problems reading the archive due to an incorrect filename -// extension, the returned filename string will be corrected. -func Readr(src, filename string) ([]string, string, error) { - files, err := readr(src, filename) - if err != nil { - fmt.Println("readr error:", err) - return readCommand(src, filename) - } - return files, filename, nil -} - -func readCommand(src, filename string) ([]string, string, error) { - files, ext, err := Readr(src, filename) - if errors.Is(err, ErrWrongExt) { - newname := Rename(ext, filename) - files, err = readr(src, newname) - if err != nil { - return nil, "", fmt.Errorf("readr fix: %w", err) - } - return files, newname, nil + return nil, fmt.Errorf("read %s: %w", filepath.Base(src), ErrDir) } + files, err := walker(src, filename) if err != nil { - return nil, "", fmt.Errorf("readr: %w", err) + return commander(src, filename) } - // remove empty entries - files = slices.DeleteFunc(files, func(s string) bool { - return strings.TrimSpace(s) == "" - }) - return files, filename, nil + return files, nil } -func readr(src, filename string) ([]string, error) { +// walker uses the mholt/archiver package to walk the src archive file. +func walker(src, filename string) ([]string, error) { name := strings.ToLower(filename) // ByExtension is case sensitive format, err := archiver.ByExtension(name) if err != nil { - return nil, fmt.Errorf("by extension: %w", err) + return nil, err } w, ok := format.(archiver.Walker) if !ok { @@ -159,6 +132,20 @@ func readr(src, filename string) ([]string, error) { return files, err } +// commander uses system archiver and decompression programs to read the src archive file. +func commander(src, filename string) ([]string, error) { + c := Contents{} + if err := c.Read(src, filename); err != nil { + return nil, fmt.Errorf("commander failed with %s (%q): %w", filename, c.Ext, err) + } + // remove empty entries + files := c.Files + files = slices.DeleteFunc(files, func(s string) bool { + return strings.TrimSpace(s) == "" + }) + return files, nil +} + // Extract the targets file from src archive to the destination folder. // The archive format is selected implicitly. // @@ -253,21 +240,24 @@ func MagicExt(src string) (string, error) { } s := strings.Split(strings.ToLower(string(out)), ",") magic := strings.TrimSpace(s[0]) + if MagicLHA(magic) { + return lhax, nil + } for magic, ext := range magics { if strings.TrimSpace(s[0]) == magic { return ext, nil } } - if MagicLHA(magic) { - return lhax, nil - } return "", fmt.Errorf("%w: %q", ErrMagic, magic) } // MagicLHA returns true if the LHA file type is matched in the magic string. func MagicLHA(magic string) bool { s := strings.Split(magic, " ") - const lha = "lha" + const lha, lharc = "lha", "lharc" + if s[0] == lharc { + return true + } if s[0] != lha { return false } @@ -313,14 +303,14 @@ func (c *Contents) Read(src, filename string) error { if err != nil { return fmt.Errorf("system reader: %w", err) } - if !strings.EqualFold(ext, filepath.Ext(filename)) { - // retry using correct filename extension - return fmt.Errorf("system reader: %w", ErrWrongExt) - } + // if !strings.EqualFold(ext, filepath.Ext(filename)) { + // // retry using correct filename extension + // return fmt.Errorf("system reader: %w", ErrWrongExt) + // } switch strings.ToLower(ext) { case arjx: return c.ARJ(src) - case lhax: + case lhax, lhzx: return c.LHA(src) case rarx: return c.Rar(src) @@ -497,7 +487,7 @@ func (x Extractor) Extract(targets ...string) error { switch ext { case arjx: return x.ARJ(targets...) - case lhax: + case lhax, lhzx: return x.LHA(targets...) case zipx: return x.Zip(targets...) diff --git a/internal/archive/archive_test.go b/internal/archive/archive_test.go index d3c6d1fb..27bd339a 100644 --- a/internal/archive/archive_test.go +++ b/internal/archive/archive_test.go @@ -1,5 +1,10 @@ package archive_test +// It is highly recommended to run these tests with -race flag to detect +// race conditions. +// +// go test -timeout 30s -count 5 -race github.com/Defacto2/server/internal/archive + import ( "os" "path/filepath" @@ -24,81 +29,75 @@ func td(name string) string { func TestContent(t *testing.T) { t.Parallel() - files, name, err := archive.Content("", "") + files, err := archive.Content("", "") assert.Error(t, err) assert.Empty(t, files) - assert.Empty(t, name) - files, name, err = archive.Content(td(""), "") + files, err = archive.Content(td(""), "") assert.Error(t, err) assert.Empty(t, files) - assert.Empty(t, name) // test a deflated zip file - files, name, err = archive.Content(td("PKZ204EX.ZIP"), "PKZ204EX.ZIP") + finename := "PKZ204EX.ZIP" + files, err = archive.Content(td(finename), finename) assert.Nil(t, err) assert.Len(t, files, 15) - assert.Equal(t, "PKZ204EX.ZIP", name) // test the tar handler - files, name, err = archive.Content(td("TAR135.TAR"), "TAR135.TAR") + finename = "TAR135.TAR" + files, err = archive.Content(td(finename), finename) assert.Nil(t, err) assert.Len(t, files, 15) - assert.Equal(t, "TAR135.TAR", name) // test the rar handler - files, name, err = archive.Content(td("RAR624.RAR"), "RAR624.RAR") + finename = "RAR624.RAR" + files, err = archive.Content(td(finename), finename) assert.Nil(t, err) assert.Len(t, files, 15) - assert.Equal(t, "RAR624.RAR", name) // test the tar.gz handler - files, name, err = archive.Content(td("TAR135.GZ"), "TAR135.TAR.GZ") + finename = "TAR135.TAR.GZ" + files, err = archive.Content(td("TAR135.GZ"), finename) assert.Nil(t, err) assert.Len(t, files, 15) - assert.Equal(t, "TAR135.TAR.GZ", name) // test an arj file - files, name, err = archive.Content(td("ARJ310.ARJ"), "ARJ310.ARJ") + files, err = archive.Content(td("ARJ310.ARJ"), "ARJ310.ARJ") assert.Nil(t, err) assert.Len(t, files, 15) - assert.Equal(t, "ARJ310.ARJ", name) // test an unsupported arc file - files, name, err = archive.Content(td("ARC521P.ARC"), "ARC521P.ARC") + files, err = archive.Content(td("ARC521P.ARC"), "ARC521P.ARC") assert.Error(t, err) assert.Empty(t, files) - assert.Empty(t, name) // test a legacy shrunk archive - files, name, err = archive.Content(td("PKZ80A1.ZIP"), "PKZ80A1.ZIP") + finename = "PKZ80A1.ZIP" + files, err = archive.Content(td(finename), finename) assert.Nil(t, err) assert.Len(t, files, 15) - assert.Equal(t, "PKZ80A1.ZIP", name) - // test an unsupported 7z file - files, name, err = archive.Content(td("TEST.7z"), "TEST.7z") + files, err = archive.Content(td("TEST.7z"), "TEST.7z") assert.Error(t, err) assert.Empty(t, files) - assert.Empty(t, name) // test a xz archive - files, name, err = archive.Content(td("TEST.tar.xz"), "TEST.tar.xz") + finename = "TEST.tar.xz" + files, err = archive.Content(td(finename), finename) assert.Nil(t, err) assert.Len(t, files, 15) - assert.Equal(t, "TEST.tar.xz", name) // test an unsupported lha archive - files, name, err = archive.Content(td("LHA114.LZH"), "LHA114.LZH") - assert.Error(t, err) - assert.Empty(t, files) - assert.Empty(t, name) + finename = "LHA114.LZH" + files, err = archive.Content(td(finename), finename) + assert.Nil(t, err) + assert.Len(t, files, 15) // test non-latin text - files, name, err = archive.Content(td("τεχτƒιℓε.zip"), "τεχτƒιℓε.zip") + finename = "τεχτƒιℓε.zip" + files, err = archive.Content(td(finename), finename) assert.Nil(t, err) assert.Len(t, files, 1) - assert.Equal(t, "τεχτƒιℓε.zip", name) } func TestExtractAll(t *testing.T) {