From 5928e4f4ec9ff65d927292eddbbc36048bffe44f Mon Sep 17 00:00:00 2001 From: Ben Garrett Date: Tue, 6 Feb 2024 14:45:39 +1100 Subject: [PATCH] Add support for external links in DemozooLink struct --- handler/app/context.go | 62 +++-------- internal/archive/archive.go | 147 +++++++++++--------------- internal/archive/archive_test.go | 34 +++--- internal/archive/find.go | 107 +++++++++++++++++++ internal/archive/find_test.go | 26 +++++ internal/archive/internal/internal.go | 46 ++++++++ internal/helper/get.go | 4 + internal/helper/read.go | 23 ++++ internal/zoo/zoo.go | 102 +++++++++++++++++- internal/zoo/zoo_test.go | 53 ++++++++++ 10 files changed, 448 insertions(+), 156 deletions(-) create mode 100644 internal/archive/find.go create mode 100644 internal/archive/find_test.go create mode 100644 internal/archive/internal/internal.go diff --git a/handler/app/context.go b/handler/app/context.go index 274c4d95..21c73be0 100644 --- a/handler/app/context.go +++ b/handler/app/context.go @@ -430,6 +430,8 @@ type DemozooLink struct { FileHash string `json:"file_hash"` // Content is the file archive content. Content string `json:"content"` + // Readme is the file readme, text or NFO file. + Readme string `json:"readme"` // LinkURL is the download file link used to fetch the file. LinkURL string `json:"link_url"` // LinkClass is the download link class provided by Demozoo. @@ -438,7 +440,10 @@ type DemozooLink struct { Success bool `json:"success"` // Error is the error message if the download or record update failed. Error string `json:"error"` - // todo add more fields + // The following data points are obtained from the ExternalLinks fields. + Github string `json:"github_repo"` + Pouet int `json:"pouet_prod"` + YouTube string `json:"youtube_video"` } // GetDemozooLink fetches the multiple download_links values from the @@ -460,6 +465,7 @@ func GetDemozooLink(z *zap.SugaredLogger, c echo.Context, downloadDir string) er FileType: "", FileHash: "", Content: "", + Readme: "", LinkURL: "", LinkClass: "", Success: false, @@ -519,6 +525,10 @@ func (got *DemozooLink) Download(c echo.Context, downloadDir string) error { got.LinkClass = link.LinkClass got.Success = true got.Error = "" + // obtain data from the external links + got.Github = rec.GithubRepo() + got.Pouet = rec.PouetProd() + got.YouTube = rec.YouTubeVideo() return got.Stat(c, downloadDir) } got.Error = "no usable download links found, they returned 404 or were empty" @@ -555,61 +565,15 @@ func (got *DemozooLink) Stat(c echo.Context, downloadDir string) error { } func (got *DemozooLink) ArchiveContent(c echo.Context, path string) error { - //return c.JSON(http.StatusOK, got) - files, err := archive.Content(path, got.Filename) + files, err := archive.List(path, got.Filename) if err != nil { return c.JSON(http.StatusOK, got) } + got.Readme = archive.Readme(got.Filename, files...) got.Content = strings.Join(files, "\n") return c.JSON(http.StatusOK, got) } -/* - "external_links": [ - { - "link_class": "GithubRepo", - "url": "https://github.com/KoltesDigital/Those-Who-Leave" - }, - { - "link_class": "PouetProduction", - "url": "https://www.pouet.net/prod.php?which=71562" - }, - { - "link_class": "YoutubeVideo", - "url": "https://www.youtube.com/watch?v=x6QrKsBOERA" - } - "external_links": [ - { - "link_class": "PouetProduction", - "url": "https://www.pouet.net/prod.php?which=95362" - }, - { - "link_class": "YoutubeVideo", - "url": "https://www.youtube.com/watch?v=mbmNU5QVM8A" - } - ], - - Broken URL: http://scene.org/file.php?id=299790 - SKIP URL: https://files.scene.org/view/parties/2013/evoke13/demo/traction_brainstorm_muoto.avi - - "download_links": [ - { - "link_class": "SceneOrgFile", - "url": "https://files.scene.org/view/parties/2013/evoke13/demo/traction_brainstorm_muoto.zip" - }, - { - "link_class": "UntergrundFile", - "url": "https://ftp.untergrund.net/users/brainstorm/Production/traction_brainstorm_muoto.zip" - } - ], -*/ - -/* - web_id_pouet, web_id_youtube, web_id_github, web_id_16colors ? - https://demozoo.org/api/v1/productions/1/ - retrotxt_readme -*/ - // GoogleCallback is the handler for the Google OAuth2 callback page to verify // the [Google ID token]. // diff --git a/internal/archive/archive.go b/internal/archive/archive.go index fa3f185d..1d436d63 100644 --- a/internal/archive/archive.go +++ b/internal/archive/archive.go @@ -1,3 +1,21 @@ +// Package archive provides compressed and stored archive file extraction and content listing. +// +// The file archive formats supported are ARJ, LHA, LZH, RAR, TAR, and ZIP. +// +// The package uses the [mholt/archiver/v3] package and the following Linux programs +// as a fallback for legacy file support. +// +// 1. [arj] - open-source ARJ v3.10 +// 2. [lha] - Lhasa v0.4 LHA tool found in the jlha-utils or lhasa packages +// 3. [unrar] - 6.24 freeware by Alexander Roshal, not the common [unrar-free] which is feature incomplete +// 4. [zipinfo] - ZipInfo v3 by the Info-ZIP workgroup +// +// [mholt/archiver/v3]: https://github.com/mholt/archiver/tree/v3.5.1 +// [arj]: https://arj.sourceforge.net/ +// [lha]: https://fragglet.github.io/lhasa/ +// [unrar]: https://www.rarlab.com/rar_add.htm +// [unrar-free]: https://gitlab.com/bgermann/unrar-free +// [zipinfo]: https://infozip.sourceforge.net/ package archive import ( @@ -15,6 +33,7 @@ import ( "strings" "unicode/utf8" + "github.com/Defacto2/server/internal/archive/internal" "github.com/mholt/archiver/v3" "golang.org/x/text/encoding/charmap" "golang.org/x/text/transform" @@ -31,38 +50,14 @@ const ( ) var ( - ErrArchive = errors.New("format specified by source filename is not an archive format") - ErrDest = errors.New("dest directory points to a file") - ErrDir = errors.New("is a directory") - ErrFile = errors.New("no such file") - ErrMagic = errors.New("no unsupport for magic file type") - ErrReadr = errors.New("system could not read the file archive") - ErrSilent = errors.New("archiver program silently failed, it return no output or errors") - ErrProg = errors.New("archive program error") - ErrTypeOut = errors.New("magic file program result is empty") - ErrWriter = errors.New("writer must be a file object") - ErrWrongExt = errors.New("filename has the wrong file extension") - ErrUnknownExt = errors.New("the archive uses an unsupported file extension") + ErrExt = errors.New("extension is not a supported archive format") + ErrRead = errors.New("could not read the file archive") + ErrProg = errors.New("program error") + ErrFile = errors.New("path is a directory") + ErrPath = errors.New("path is a file") + ErrMissing = errors.New("path does not exist") ) -// ArjItem returns true if the string is a row from the [arj program] list command. -// -// [arj program]: https://arj.sourceforge.net/ -func ARJItem(s string) bool { - const minLen = 6 - if len(s) < minLen { - return false - } - if s[3:4] != ")" { - return false - } - x := s[:3] - if _, err := strconv.Atoi(x); err != nil { - return false - } - return true -} - // CheckyPath checks the byte slice for valid UTF-8 encoding. // If the byte slice is not valid, it will attempt to decode // the byte slice using the MS-DOS, [charmap.CodePage437] character set. @@ -85,15 +80,15 @@ func CheckyPath(b []byte) string { return string(result) } -// Content returns a list of files within an rar, tar, lha, or zip archive. +// List returns the files within an rar, tar, lha, or zip archive. // This filename extension is used to determine the archive format. -func Content(src, filename string) ([]string, error) { +func List(src, filename string) ([]string, error) { st, err := os.Stat(src) if errors.Is(err, fs.ErrNotExist) { - return nil, fmt.Errorf("read %s: %w", filepath.Base(src), ErrFile) + return nil, fmt.Errorf("read %s: %w", filepath.Base(src), ErrMissing) } if st.IsDir() { - return nil, fmt.Errorf("read %s: %w", filepath.Base(src), ErrDir) + return nil, fmt.Errorf("read %s: %w", filepath.Base(src), ErrFile) } files, err := walker(src, filename) if err != nil { @@ -111,7 +106,7 @@ func walker(src, filename string) ([]string, error) { } w, ok := format.(archiver.Walker) if !ok { - return nil, fmt.Errorf("readr %s (%T): %w", filename, format, ErrArchive) + return nil, fmt.Errorf("%w, %q", ErrExt, filename) } files := []string{} err = w.Walk(src, func(f archiver.File) error { @@ -133,7 +128,7 @@ func walker(src, filename string) ([]string, error) { // commander uses system archiver and decompression programs to read the src archive file. func commander(src, filename string) ([]string, error) { - c := Contents{} + c := Content{} if err := c.Read(src, filename); err != nil { return nil, fmt.Errorf("commander failed with %s (%q): %w", filename, c.Ext, err) } @@ -164,7 +159,7 @@ func Extract(src, dst, filename string, targets ...string) error { if extractAll { all, ok := f.(archiver.Unarchiver) if !ok { - return fmt.Errorf("extract all %s (%T): %w", filename, f, ErrArchive) + return fmt.Errorf("%w, %q", ErrExt, filename) } if err = all.Unarchive(src, dst); err == nil { return nil @@ -172,7 +167,7 @@ func Extract(src, dst, filename string, targets ...string) error { } else { target, ok := f.(archiver.Extractor) if !ok { - return fmt.Errorf("extract %s (%T): %w", name, f, ErrArchive) + return fmt.Errorf("%w, %q", ErrExt, filename) } t := strings.Join(targets, " ") if err = target.Extract(src, t, dst); err == nil { @@ -184,7 +179,7 @@ func Extract(src, dst, filename string, targets ...string) error { // extractor second attempt at extraction using a system archiver program func extractor(src, dst, filename string, targets ...string) error { - x := Extractor{Source: src, Destination: dst, OriginalName: filename} + x := Extractor{Source: src, Destination: dst, Filename: filename} err := x.Extract(targets...) if err != nil { return fmt.Errorf("command extract: %w", err) @@ -212,7 +207,7 @@ func MagicExt(src string) (string, error) { return "", fmt.Errorf("magic file type: %w", err) } if len(out) == 0 { - return "", fmt.Errorf("magic file type: %w", ErrTypeOut) + return "", fmt.Errorf("magic file type: %w", ErrRead) } magics := map[string]string{ "7-zip archive data": ".7z", @@ -225,7 +220,7 @@ func MagicExt(src string) (string, error) { } s := strings.Split(strings.ToLower(string(out)), ",") magic := strings.TrimSpace(s[0]) - if MagicLHA(magic) { + if internal.MagicLHA(magic) { return lhax, nil } for magic, ext := range magics { @@ -233,34 +228,12 @@ func MagicExt(src string) (string, error) { return ext, nil } } - return "", fmt.Errorf("%w: %q", ErrMagic, magic) -} - -// MagicLHA returns true if the LHA file type is matched in the magic string. -func MagicLHA(magic string) bool { - s := strings.Split(magic, " ") - const lha, lharc = "lha", "lharc" - if s[0] == lharc { - return true - } - if s[0] != lha { - return false - } - if len(s) < len(lha) { - return false - } - if strings.Join(s[0:3], " ") == "lha archive data" { - return true - } - if strings.Join(s[2:4], " ") == "archive data" { - return true - } - return false + return "", fmt.Errorf("%w: %q", ErrExt, magic) } -// Rename the filename by replacing the file extension with the ext string. +// Replace the filename file extension with the ext string. // Leaving ext empty returns the filename without a file extension. -func Rename(ext, filename string) string { +func Replace(ext, filename string) string { const sep = "." s := strings.Split(filename, sep) if ext == "" && len(s) == 1 { @@ -276,8 +249,8 @@ func Rename(ext, filename string) string { return strings.Join(s, sep) } -// Contents are the result of using system programs to read the file archives. -type Contents struct { +// Content are the result of using system programs to read the file archives. +type Content struct { Files []string // Files returns list of files within the archive. Ext string // Ext returns file extension of the archive. } @@ -286,7 +259,7 @@ type Contents struct { // credited to Robert Jung, using the [arj program]. // // [arj program]: https://arj.sourceforge.net/ -func (c *Contents) ARJ(src string) error { +func (c *Content) ARJ(src string) error { prog, err := exec.LookPath("arj") if err != nil { return fmt.Errorf("arj reader: %w", err) @@ -303,13 +276,13 @@ func (c *Contents) ARJ(src string) error { return err } if len(out) == 0 { - return ErrReadr + return ErrRead } outs := strings.Split(string(out), "\n") files := []string{} const start = len("001) ") for _, s := range outs { - if !ARJItem(s) { + if !internal.ARJItem(s) { continue } files = append(files, s[start:]) @@ -324,8 +297,8 @@ func (c *Contents) ARJ(src string) error { // LHA returns the content of the src LHA or LZH archive, // credited to Haruyasu Yoshizaki (Yoshi), using the [lha program]. // -// [lha program]: http://justsolve.archiveteam.org/index.php?title=LHA -func (c *Contents) LHA(src string) error { +// [lha program]: https://fragglet.github.io/lhasa/ +func (c *Content) LHA(src string) error { prog, err := exec.LookPath("lha") if err != nil { return fmt.Errorf("lha reader: %w", err) @@ -342,7 +315,7 @@ func (c *Contents) LHA(src string) error { return err } if len(out) == 0 { - return ErrReadr + return ErrRead } outs := strings.Split(string(out), "\n") @@ -378,7 +351,7 @@ func (c *Contents) LHA(src string) error { // using the [unrar program]. // // [unrar program]: https://www.rarlab.com/rar_add.htm -func (c *Contents) Rar(src string) error { +func (c *Content) Rar(src string) error { prog, err := exec.LookPath("unrar") if err != nil { return fmt.Errorf("unrar reader: %w", err) @@ -397,7 +370,7 @@ func (c *Contents) Rar(src string) error { return fmt.Errorf("%q: %w", src, err) } if len(out) == 0 { - return ErrReadr + return ErrRead } c.Files = strings.Split(string(out), "\n") c.Files = slices.DeleteFunc(c.Files, func(s string) bool { @@ -410,7 +383,7 @@ func (c *Contents) Rar(src string) error { // Read returns the content of the src file archive using the system archiver programs. // The filename is used to determine the archive format. // Supported formats are ARJ, LHA, LZH, RAR, and ZIP. -func (c *Contents) Read(src, filename string) error { +func (c *Content) Read(src, filename string) error { ext, err := MagicExt(src) if err != nil { return fmt.Errorf("system reader: %w", err) @@ -429,14 +402,14 @@ func (c *Contents) Read(src, filename string) error { case zipx: return c.Zip(src) } - return fmt.Errorf("system reader: %w", ErrReadr) + return fmt.Errorf("system reader: %w", ErrRead) } // Zip returns the content of the src ZIP archive, credited to Phil Katz, // using the [zipinfo program]. // -// [zipinfo program]: https://www.linux.org/docs/man1/zipinfo.html -func (c *Contents) Zip(src string) error { +// [zipinfo program]: https://infozip.sourceforge.net/ +func (c *Content) Zip(src string) error { prog, err := exec.LookPath("zipinfo") if err != nil { return fmt.Errorf("zipinfo reader: %w", err) @@ -458,7 +431,7 @@ func (c *Contents) Zip(src string) error { return fmt.Errorf("%q: %w", src, err) } if len(out) == 0 { - return ErrReadr + return ErrRead } c.Files = strings.Split(string(out), "\n") c.Files = slices.DeleteFunc(c.Files, func(s string) bool { @@ -474,7 +447,7 @@ type Extractor struct { Destination string // The extraction destination directory. // The original filename of the archive, used by Extract to determine the archive format. - OriginalName string + Filename string } // ARJ extracts the targets from the source ARJ archive @@ -487,7 +460,7 @@ func (x Extractor) ARJ(targets ...string) error { if st, err := os.Stat(dst); err != nil { return fmt.Errorf("%w: %s", err, dst) } else if !st.IsDir() { - return fmt.Errorf("%w: %s", ErrDest, dst) + return fmt.Errorf("%w: %s", ErrPath, dst) } // note: only use arj, as unarj offers limited functionality prog, err := exec.LookPath("arj") @@ -516,9 +489,9 @@ func (x Extractor) ARJ(targets ...string) error { // to the destination directory a system archive program. // If the targets are empty then all files are extracted. // -// The following archive formats are supported: ARJ, LHA, LZH, RAR, and ZIP. +// The required Filename string is used to determine the archive format. func (x Extractor) Extract(targets ...string) error { - ext := strings.ToLower(filepath.Ext(x.OriginalName)) + ext := strings.ToLower(filepath.Ext(x.Filename)) switch ext { case arjx: return x.ARJ(targets...) @@ -527,7 +500,7 @@ func (x Extractor) Extract(targets ...string) error { case zipx: return x.Zip(targets...) default: - return ErrUnknownExt + return ErrExt } } @@ -566,7 +539,7 @@ func (x Extractor) LHA(targets ...string) error { return fmt.Errorf("%s: %w", prog, err) } if len(out) == 0 { - return ErrSilent + return ErrRead } return nil } diff --git a/internal/archive/archive_test.go b/internal/archive/archive_test.go index 52f52f75..b539216f 100644 --- a/internal/archive/archive_test.go +++ b/internal/archive/archive_test.go @@ -31,73 +31,73 @@ func td(name string) string { func TestContent(t *testing.T) { t.Parallel() - files, err := archive.Content("", "") + files, err := archive.List("", "") assert.Error(t, err) assert.Empty(t, files) - files, err = archive.Content(td(""), "") + files, err = archive.List(td(""), "") assert.Error(t, err) assert.Empty(t, files) // test a deflated zip file finename := "PKZ204EX.ZIP" - files, err = archive.Content(td(finename), finename) + files, err = archive.List(td(finename), finename) assert.Nil(t, err) assert.Len(t, files, 15) // test the tar handler finename = "TAR135.TAR" - files, err = archive.Content(td(finename), finename) + files, err = archive.List(td(finename), finename) assert.Nil(t, err) assert.Len(t, files, 15) // test the rar handler finename = "RAR624.RAR" - files, err = archive.Content(td(finename), finename) + files, err = archive.List(td(finename), finename) assert.Nil(t, err) assert.Len(t, files, 15) // test the tar.gz handler finename = "TAR135.TAR.GZ" - files, err = archive.Content(td("TAR135.GZ"), finename) + files, err = archive.List(td("TAR135.GZ"), finename) assert.Nil(t, err) assert.Len(t, files, 15) // test an arj file - files, err = archive.Content(td("ARJ310.ARJ"), "ARJ310.ARJ") + files, err = archive.List(td("ARJ310.ARJ"), "ARJ310.ARJ") assert.Nil(t, err) assert.Len(t, files, 15) // test an unsupported arc file - files, err = archive.Content(td("ARC521P.ARC"), "ARC521P.ARC") + files, err = archive.List(td("ARC521P.ARC"), "ARC521P.ARC") assert.Error(t, err) assert.Empty(t, files) // test a legacy shrunk archive finename = "PKZ80A1.ZIP" - files, err = archive.Content(td(finename), finename) + files, err = archive.List(td(finename), finename) assert.Nil(t, err) assert.Len(t, files, 15) // test an unsupported 7z file - files, err = archive.Content(td("TEST.7z"), "TEST.7z") + files, err = archive.List(td("TEST.7z"), "TEST.7z") assert.Error(t, err) assert.Empty(t, files) // test a xz archive finename = "TEST.tar.xz" - files, err = archive.Content(td(finename), finename) + files, err = archive.List(td(finename), finename) assert.Nil(t, err) assert.Len(t, files, 15) // test an unsupported lha archive finename = "LHA114.LZH" - files, err = archive.Content(td(finename), finename) + files, err = archive.List(td(finename), finename) assert.Nil(t, err) assert.Len(t, files, 15) // test non-latin text finename = "τεχτƒιℓε.zip" - files, err = archive.Content(td(finename), finename) + files, err = archive.List(td(finename), finename) assert.Nil(t, err) assert.Len(t, files, 1) } @@ -322,7 +322,7 @@ func TestZip(t *testing.T) { func TestBodyARJ(t *testing.T) { t.Parallel() const name = "ARJ310.ARJ" - x := archive.Contents{} + x := archive.Content{} err := x.ARJ("") assert.Error(t, err) @@ -337,7 +337,7 @@ func TestBodyARJ(t *testing.T) { func TestBodyLHA(t *testing.T) { t.Parallel() const name = "LHA114.LZH" - x := archive.Contents{} + x := archive.Content{} err := x.LHA("") assert.Error(t, err) @@ -352,7 +352,7 @@ func TestBodyLHA(t *testing.T) { func TestBodyRar(t *testing.T) { t.Parallel() const name = "RAR624.RAR" - x := archive.Contents{} + x := archive.Content{} err := x.Rar("") assert.Error(t, err) @@ -367,7 +367,7 @@ func TestBodyRar(t *testing.T) { func TestBodyZip(t *testing.T) { t.Parallel() const name = "PKZ80A4.ZIP" - x := archive.Contents{} + x := archive.Content{} err := x.Zip("") assert.Error(t, err) diff --git a/internal/archive/find.go b/internal/archive/find.go new file mode 100644 index 00000000..80d37619 --- /dev/null +++ b/internal/archive/find.go @@ -0,0 +1,107 @@ +package archive + +import ( + "path/filepath" + "sort" + "strings" +) + +// Finds are a collection of matched filenames and their usability ranking. +type Finds map[string]Usability + +// BestMatch returns the most usable filename from a collection of finds. +func (f Finds) BestMatch() string { + if len(f) == 0 { + return "" + } + type fp struct { + Filename string + Usability Usability + } + ss := make([]fp, len(f)) + i := 0 + for k, v := range f { + ss[i] = fp{k, v} + i++ + } + sort.SliceStable(ss, func(i, j int) bool { + return ss[i].Usability < ss[j].Usability // '<' equals assending order + }) + for _, kv := range ss { + return kv.Filename // return first result + } + return "" +} + +const ( + diz = ".diz" + nfo = ".nfo" + txt = ".txt" +) + +// Readme returns the best matching scene text README or NFO file from a collection of files. +// The filename is the name of the archive file, and the files are the list of files in the archive. +// Note the filename matches are case-insensitive as many handled file archives are +// created on Windows FAT32, NTFS or MS-DOS FAT16 file systems. +func Readme(filename string, files ...string) string { + f := make(Finds) + for _, file := range files { + name := strings.ToLower(file) + base := strings.ToLower(strings.TrimSuffix(filename, filepath.Ext(filename))) + ext := strings.ToLower(filepath.Ext(name)) + switch ext { + case diz, nfo, txt: + // okay + default: + continue + } + f = matchs(file, name, base, f) + } + return f.BestMatch() +} + +func matchs(file, name, base string, f Finds) Finds { + ext := strings.ToLower(filepath.Ext(name)) + switch { + case name == base+nfo: + // [archive name].nfo + f[file] = Lvl1 + case name == base+txt: + // [archive name].txt + f[file] = Lvl2 + case ext == nfo: + // [random].nfo + f[file] = Lvl3 + case name == "file_id.diz": + // BBS file description + f[file] = Lvl4 + case name == base+diz: + // [archive name].diz + f[file] = Lvl5 + case name == txt: + // [random].txt + f[file] = Lvl6 + case name == diz: + // [random].diz + f[file] = Lvl7 + default: + // currently lacking is [group name].nfo and [group name].txt priorities + } + return f +} + +// Usability of search, filename pattern matches. +type Usability uint + +const ( + // Lvl1 is the highest usability. + Lvl1 Usability = iota + 1 + Lvl2 + Lvl3 + Lvl4 + Lvl5 + Lvl6 + Lvl7 + Lvl8 + Lvl9 // Lvl9 is the least usable. +) diff --git a/internal/archive/find_test.go b/internal/archive/find_test.go new file mode 100644 index 00000000..07c86edc --- /dev/null +++ b/internal/archive/find_test.go @@ -0,0 +1,26 @@ +package archive_test + +import ( + "testing" + + "github.com/Defacto2/server/internal/archive" + "github.com/Defacto2/server/internal/helper" + "github.com/stretchr/testify/assert" +) + +func TestReadme(t *testing.T) { + + s := archive.Readme("") + assert.Empty(t, s) + + dir := td("uncompress") + files, err := helper.Files(dir) + assert.NoError(t, err) + assert.Equal(t, 16, len(files)) + + s = archive.Readme("", files...) + assert.Equal(t, "TEST.NFO", s) + + s = archive.Readme("TEST.ZIP", files...) + assert.Equal(t, "TEST.NFO", s) +} diff --git a/internal/archive/internal/internal.go b/internal/archive/internal/internal.go new file mode 100644 index 00000000..72c14065 --- /dev/null +++ b/internal/archive/internal/internal.go @@ -0,0 +1,46 @@ +package internal + +import ( + "strconv" + "strings" +) + +// ArjItem returns true if the string is a row from the [arj program] list command. +// +// [arj program]: https://arj.sourceforge.net/ +func ARJItem(s string) bool { + const minLen = 6 + if len(s) < minLen { + return false + } + if s[3:4] != ")" { + return false + } + x := s[:3] + if _, err := strconv.Atoi(x); err != nil { + return false + } + return true +} + +// MagicLHA returns true if the LHA file type is matched in the magic string. +func MagicLHA(magic string) bool { + s := strings.Split(magic, " ") + const lha, lharc = "lha", "lharc" + if s[0] == lharc { + return true + } + if s[0] != lha { + return false + } + if len(s) < len(lha) { + return false + } + if strings.Join(s[0:3], " ") == "lha archive data" { + return true + } + if strings.Join(s[2:4], " ") == "archive data" { + return true + } + return false +} diff --git a/internal/helper/get.go b/internal/helper/get.go index 88415a5a..6c4866ee 100644 --- a/internal/helper/get.go +++ b/internal/helper/get.go @@ -25,6 +25,10 @@ func Redirect(rawURL string) string { if err != nil { return rawURL } + if u.Host == "scene.org" && u.Path == "/file.php" { + // match broken legacy URLs: http://scene.org/file.php?id=299790 + return rawURL + } if u.Host == "files.scene.org" { p := u.Path x := strings.Split(p, "/") diff --git a/internal/helper/read.go b/internal/helper/read.go index e5fe8c12..8c8a95ba 100644 --- a/internal/helper/read.go +++ b/internal/helper/read.go @@ -8,6 +8,29 @@ import ( "os" ) +// Files returns the filenames in the given directory. +func Files(dir string) ([]string, error) { + st, err := os.Stat(dir) + if err != nil { + return nil, err + } + if !st.IsDir() { + return nil, fmt.Errorf("%w: %s", ErrDirPath, dir) + } + files, err := os.ReadDir(dir) + if err != nil { + return nil, err + } + var names []string + for _, file := range files { + if file.IsDir() { + continue + } + names = append(names, file.Name()) + } + return names, nil +} + // Lines returns the number of lines in the named file. func Lines(name string) (int, error) { file, err := os.Open(name) diff --git a/internal/zoo/zoo.go b/internal/zoo/zoo.go index 3e83f5ed..baed6566 100644 --- a/internal/zoo/zoo.go +++ b/internal/zoo/zoo.go @@ -1,5 +1,8 @@ -// Package zoo provides data about releasers and groups on the Demozoo website. -// https://demozoo.org +// Package zoo handles the retrieval of [production records] from the +// [Demozoo] API and the extraction of relevant data for the Defacto2 website. +// +// [production records]: https://demozoo.org/api/v1/productions/ +// [Demozoo]: https://demozoo.org package zoo import ( @@ -9,6 +12,7 @@ import ( "fmt" "io" "net/http" + "net/url" "strconv" "time" @@ -21,6 +25,9 @@ const ( Timeout = 5 * time.Second ) +// Demozoo is a production record from the Demozoo API. +// Only the fields required for the Defacto2 website are included, +// with everything else being ignored. type Demozoo struct { // Title is the production title. Title string `json:"title"` @@ -50,6 +57,11 @@ type Demozoo struct { LinkClass string `json:"link_class"` URL string `json:"url"` } `json:"download_links"` + // ExternalLinks links to the remotely hosted files. + ExternalLinks []struct { + LinkClass string `json:"link_class"` + URL string `json:"url"` + } `json:"external_links"` // ID is the production ID. ID int `json:"id"` } @@ -60,7 +72,11 @@ var ( ErrStatus = errors.New("demozoo production status is not ok") ) -// Get returns the production data from the Demozoo API. +// Get requests data for a production record from the [Demozoo API]. +// It returns an error if the production ID is invalid, when the request +// reaches a [Timeout] or fails. +// +// [Demozoo API]: https://demozoo.org/api/v1/productions/ func (d *Demozoo) Get(id int) error { if id < 1 { return fmt.Errorf("%w: %d", ErrID, id) @@ -97,6 +113,86 @@ func (d *Demozoo) Get(id int) error { return nil } +// GithubRepo returns the Github repository path of the production using +// the Demozoo struct. It searches the external links for a link class that +// matches GithubRepo. +func (d Demozoo) GithubRepo() string { + for _, link := range d.ExternalLinks { + if link.LinkClass != "GithubRepo" { + continue + } + url, err := url.Parse(link.URL) + if err != nil { + continue + } + if url.Host != "github.com" { + continue + } + return url.Path + } + return "" +} + +// PouetProd returns the Pouet ID of the production using +// the Demozoo struct. It searches the external links for a +// link class that matches PouetProduction. +// A 0 is returned whenever the production does not have a recognized +// Pouet production link. +func (d Demozoo) PouetProd() int { + for _, link := range d.ExternalLinks { + if link.LinkClass != "PouetProduction" { + continue + } + url, err := url.Parse(link.URL) + if err != nil { + continue + } + id, err := strconv.Atoi(url.Query().Get("which")) + if err != nil { + continue + } + return id + } + return 0 +} + +// Unmarshal parses the JSON-encoded data and stores the result +// in the Demozoo production struct. It returns an error if the JSON data is +// invalid or the production ID is invalid. +func (d *Demozoo) Unmarshal(data []byte) error { + if err := json.Unmarshal(data, &d); err != nil { + return err + } + if d.ID < 1 { + return fmt.Errorf("%w: %d", ErrID, d.ID) + } + return nil +} + +// YouTubeVideo returns the ID of a video on YouTube. It searches the external links +// for a link class that matches YoutubeVideo. +// An empty string is returned whenever the production does not have a recognized +// YouTube video link. +func (d Demozoo) YouTubeVideo() string { + for _, link := range d.ExternalLinks { + if link.LinkClass != "YoutubeVideo" { + continue + } + url, err := url.Parse(link.URL) + if err != nil { + continue + } + if url.Host != "youtube.com" && url.Host != "www.youtube.com" { + continue + } + if url.Path != "/watch" { + continue + } + return url.Query().Get("v") + } + return "" +} + // URI is a the URL slug of the releaser. type URI string diff --git a/internal/zoo/zoo_test.go b/internal/zoo/zoo_test.go index 60f850dc..581b610d 100644 --- a/internal/zoo/zoo_test.go +++ b/internal/zoo/zoo_test.go @@ -35,3 +35,56 @@ func TestFind(t *testing.T) { prod = zoo.Find("notfound") assert.Equal(t, prod, zoo.GroupID(0)) } + +func TestExternalLinks(t *testing.T) { + t.Parallel() + d := zoo.Demozoo{} + assert.Equal(t, 0, d.PouetProd()) + + d.ExternalLinks = append(d.ExternalLinks, struct { + LinkClass string `json:"link_class"` + URL string `json:"url"` + }{ + LinkClass: "class1", + URL: "http://example.com/1", + }) + assert.Equal(t, 0, d.PouetProd()) + + d.ExternalLinks = append(d.ExternalLinks, struct { + LinkClass string `json:"link_class"` + URL string `json:"url"` + }{ + LinkClass: "PouetProduction", + URL: "http://example.com/1", + }) + assert.Equal(t, 0, d.PouetProd()) + + d.ExternalLinks = append(d.ExternalLinks, struct { + LinkClass string `json:"link_class"` + URL string `json:"url"` + }{ + LinkClass: "PouetProduction", + URL: "https://www.pouet.net/prod.php?which=71562", + }) + assert.Equal(t, 71562, d.PouetProd()) + assert.Empty(t, d.GithubRepo()) + + d.ExternalLinks = append(d.ExternalLinks, struct { + LinkClass string `json:"link_class"` + URL string `json:"url"` + }{ + LinkClass: "GithubRepo", + URL: "https://github.com/Defacto2/server", + }) + assert.Equal(t, "/Defacto2/server", d.GithubRepo()) + + assert.Equal(t, "", d.YouTubeVideo()) + d.ExternalLinks = append(d.ExternalLinks, struct { + LinkClass string `json:"link_class"` + URL string `json:"url"` + }{ + LinkClass: "YoutubeVideo", + URL: "https://www.youtube.com/watch?v=x6QrKsBOERA", + }) + assert.Equal(t, "x6QrKsBOERA", d.YouTubeVideo()) +}