From 53929c7ddef56220201f69cb4debd325f0617da4 Mon Sep 17 00:00:00 2001 From: Ben Garrett Date: Tue, 18 Jun 2024 19:34:54 +1000 Subject: [PATCH] artifact readme supports utf-8 encoding. --- docs/todo.md | 2 +- handler/app/dirs.go | 44 +++++++++++++++++++++++++++----------- internal/helper/helper.go | 4 +++- internal/render/render.go | 5 +++++ view/app/artifactlock.tmpl | 4 ++-- 5 files changed, 43 insertions(+), 16 deletions(-) diff --git a/docs/todo.md b/docs/todo.md index 5890380a..ab016981 100644 --- a/docs/todo.md +++ b/docs/todo.md @@ -57,7 +57,7 @@ - Unicode single byte: http://localhost:1323/f/a5191c3,http://localhost:1323/v/ab1fc8b,http://localhost:1323/f/b61f24f, - Text viewer attempting to preview PNG image due to category: http://localhost:1323/f/af20fcb, - Maximum download permitted, 1GB: http://localhost:1323/f/aa256f1, -- Emulator, .exe file: http://localhost:1323/f/b23b9c,http://localhost:1323/f/ad18cf0,http://localhost:1323/f/b030891, +- Emulator, .exe file: http://localhost:1323/f/b23b9c,http://localhost:1323/f/ad18cf0,http://localhost:1323/f/b030891,http://localhost:1323/f/b33057 - Emulator, unsupported .arj: http://localhost:1323/f/ad18c43,http://localhost:1323/f/ae1c87b,http://localhost:1323/f/b41eefb, - Emulator, unsupported .arc: http://localhost:1323/f/a418d56, - Emulator, unsupported .lha: http://localhost:1323/f/a724be8, diff --git a/handler/app/dirs.go b/handler/app/dirs.go index 384c8696..ff840eb9 100644 --- a/handler/app/dirs.go +++ b/handler/app/dirs.go @@ -37,6 +37,7 @@ import ( "golang.org/x/exp/slices" _ "golang.org/x/image/webp" // webp format decoder "golang.org/x/text/encoding/charmap" + "golang.org/x/text/encoding/unicode" ) // Dirs contains the directories used by the artifact pages. @@ -414,7 +415,8 @@ func (dir Dirs) artifactReadme(art *models.File) (map[string]interface{}, error) nbsp437 = 0xff // non-breaking space for CP437 space = " " // intentional space ) - switch render.Encoder(art, bytes.NewReader(b)) { + textEncoding := render.Encoder(art, bytes.NewReader(b)) + switch textEncoding { case charmap.ISO8859_1: data["readmeLatin1Cls"] = "" data["readmeCP437Cls"] = "d-none" + space @@ -426,19 +428,37 @@ func (dir Dirs) artifactReadme(art *models.File) (map[string]interface{}, error) data["readmeCP437Cls"] = "" data["vgaCheck"] = "checked" b = bytes.ReplaceAll(b, []byte{nbsp437}, []byte{sp}) + case unicode.UTF8: + // use Cad font as default + data["readmeLatin1Cls"] = "d-none" + space + data["readmeCP437Cls"] = "" + data["vgaCheck"] = "checked" } - d := charmap.ISO8859_1.NewDecoder().Reader(bytes.NewReader(b)) - readme, err := decode(d) - if err != nil { - return data, fmt.Errorf("iso8859_1 decode: %w", err) - } - data["readmeLatin1"] = readme - d = charmap.CodePage437.NewDecoder().Reader(bytes.NewReader(b)) - readme, err = decode(d) - if err != nil { - return data, fmt.Errorf("codepage437 decode: %w", err) + readme := "" + switch textEncoding { + case unicode.UTF8: + // unicode should apply to both latin1 and cp437 + readme, err = decode(bytes.NewReader(b)) + if err != nil { + return data, fmt.Errorf("unicode utf8 decode: %w", err) + } + data["readmeLatin1"] = readme + data["readmeCP437"] = readme + default: + d := charmap.ISO8859_1.NewDecoder().Reader(bytes.NewReader(b)) + readme, err = decode(d) + if err != nil { + return data, fmt.Errorf("iso8859_1 decode: %w", err) + } + data["readmeLatin1"] = readme + d = charmap.CodePage437.NewDecoder().Reader(bytes.NewReader(b)) + readme, err = decode(d) + if err != nil { + return data, fmt.Errorf("codepage437 decode: %w", err) + } + data["readmeCP437"] = readme } - data["readmeCP437"] = readme + data["readmeLines"] = strings.Count(readme, "\n") data["readmeRows"] = helper.MaxLineLength(readme) return data, nil diff --git a/internal/helper/helper.go b/internal/helper/helper.go index ab7f9b9f..4c4677a6 100644 --- a/internal/helper/helper.go +++ b/internal/helper/helper.go @@ -19,6 +19,7 @@ import ( "golang.org/x/text/encoding" "golang.org/x/text/encoding/charmap" + "golang.org/x/text/encoding/unicode" ) const ( @@ -126,6 +127,7 @@ func Determine(reader io.Reader) encoding.Encoding { if err != nil { return nil } + for _, char := range p { r := rune(char) switch { @@ -157,7 +159,7 @@ func Determine(reader io.Reader) encoding.Encoding { // The maximum value of an 8-bit character is 255 (0xff), // so rune valud above that, 256+ (0x100) is a Unicode multi-byte character, // which we can assume to be UTF-8. - return nil + return unicode.UTF8 } } return sequences(p) diff --git a/internal/render/render.go b/internal/render/render.go index 14573ce4..85200ef1 100644 --- a/internal/render/render.go +++ b/internal/render/render.go @@ -15,6 +15,7 @@ import ( "github.com/Defacto2/server/internal/postgres/models" "golang.org/x/text/encoding" "golang.org/x/text/encoding/charmap" + "golang.org/x/text/encoding/unicode" ) var ( @@ -44,6 +45,10 @@ func Encoder(art *models.File, r io.Reader) encoding.Encoding { return charmap.ISO8859_1 } } + magic := strings.ToLower(strings.TrimSpace(art.FileMagicType.String)) + if strings.Contains(magic, "utf-8") { + return unicode.UTF8 + } return helper.Determine(r) } diff --git a/view/app/artifactlock.tmpl b/view/app/artifactlock.tmpl index 3e721a6d..8dd2a0a1 100644 --- a/view/app/artifactlock.tmpl +++ b/view/app/artifactlock.tmpl @@ -15,10 +15,10 @@
{{- if $disableRecord}} -

❌ Record is disabled and hidden from public access. ❌

+

🚫 Record is disabled and hidden from public access. 🚫

{{- else if $forApproval}}

- ❓ This new record is hidden from public access. ❓
+ ⛔ This new record is hidden from public access.
{{- /* todo: create a htmx element to approve and refresh */}} If the data and images are correct, it can be approved.