diff --git a/go.mod b/go.mod index a6d4d964..b8152d60 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,7 @@ require ( github.com/errata-ai/ini v1.63.0 github.com/errata-ai/regexp2 v1.7.0 github.com/gobwas/glob v0.2.3 + github.com/jdkato/go-tree-sitter-julia v0.1.0 github.com/jdkato/twine v0.10.1 github.com/karrick/godirwalk v1.16.1 github.com/mholt/archiver/v3 v3.5.1 @@ -23,6 +24,7 @@ require ( github.com/remeh/sizedwaitgroup v1.0.0 github.com/smacker/go-tree-sitter v0.0.0-20240514083259-c5d1f3f5f99e github.com/spf13/pflag v1.0.5 + github.com/stretchr/testify v1.8.4 github.com/yuin/goldmark v1.5.6 golang.org/x/exp v0.0.0-20231006140011-7918f672742d golang.org/x/net v0.23.0 @@ -38,13 +40,13 @@ require ( github.com/Masterminds/semver/v3 v3.2.0 // indirect github.com/andybalholm/brotli v1.0.1 // indirect github.com/containerd/console v1.0.3 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect github.com/golang/snappy v0.0.4 // indirect github.com/google/uuid v1.1.1 // indirect github.com/gookit/color v1.5.4 // indirect github.com/huandu/xstrings v1.3.3 // indirect github.com/imdario/mergo v0.3.11 // indirect - github.com/jdkato/go-tree-sitter-julia v0.1.0 // indirect github.com/klauspost/compress v1.11.4 // indirect github.com/klauspost/pgzip v1.2.5 // indirect github.com/kr/pretty v0.3.0 // indirect @@ -55,6 +57,7 @@ require ( github.com/montanaflynn/stats v0.7.1 // indirect github.com/nwaples/rardecode v1.1.0 // indirect github.com/pierrec/lz4/v4 v4.1.2 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect github.com/rivo/uniseg v0.4.4 // indirect github.com/shopspring/decimal v1.2.0 // indirect github.com/spf13/cast v1.3.1 // indirect @@ -65,4 +68,5 @@ require ( golang.org/x/term v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect gopkg.in/neurosnap/sentences.v1 v1.0.7 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index f284f1bb..328d74ad 100644 --- a/go.sum +++ b/go.sum @@ -63,8 +63,6 @@ github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4 github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/imdario/mergo v0.3.11 h1:3tnifQM4i+fbajXKBHXWEH+KvNHqojZ778UH75j3bGA= github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= -github.com/jdkato/go-tree-sitter-julia v0.0.0-20240531060609-b738d045ba2d h1:nc/Dgjp4Zr3drV44bz2+fUaCb1ZZvFtNnudyuaWL7uQ= -github.com/jdkato/go-tree-sitter-julia v0.0.0-20240531060609-b738d045ba2d/go.mod h1:lXNEZorcvU63DcANEklLMbDRjwam4VQ44MIV1Cck0w8= github.com/jdkato/go-tree-sitter-julia v0.1.0 h1:z+6zTbd6PHMKAge7GJx9QIwPQX2NOKb4Pj5jteJvaYY= github.com/jdkato/go-tree-sitter-julia v0.1.0/go.mod h1:lXNEZorcvU63DcANEklLMbDRjwam4VQ44MIV1Cck0w8= github.com/jdkato/twine v0.10.1 h1:Jexy1dua9nRyr45AQ3Bml1nCVYq3VIi9g09MOkg2Wwk= diff --git a/internal/core/config.go b/internal/core/config.go index 6faecdb6..63ef4f14 100644 --- a/internal/core/config.go +++ b/internal/core/config.go @@ -175,27 +175,28 @@ type CLIFlags struct { // Config holds the configuration values from both the CLI and `.vale.ini`. type Config struct { // General configuration - BlockIgnores map[string][]string // A list of blocks to ignore - Checks []string // All checks to load - Formats map[string]string // A map of unknown -> known formats - Asciidoctor map[string]string // A map of asciidoctor attributes - FormatToLang map[string]string // A map of format to lang ID - GBaseStyles []string // Global base style - GChecks map[string]bool // Global checks - IgnoredClasses []string // A list of HTML classes to ignore - IgnoredScopes []string // A list of HTML tags to ignore - MinAlertLevel int // Lowest alert level to display - Vocab []string // The active project - RuleToLevel map[string]string // Single-rule level changes - SBaseStyles map[string][]string // Syntax-specific base styles - SChecks map[string]map[string]bool // Syntax-specific checks - SkippedScopes []string // A list of HTML blocks to ignore - Stylesheets map[string]string // XSLT stylesheet - TokenIgnores map[string][]string // A list of tokens to ignore - WordTemplate string // The template used in YAML -> regexp list conversions - RootINI string // the path to the project's .vale.ini file - Paths []string // A list of paths to search for styles - ConfigFiles []string // A list of configuration files to load + BlockIgnores map[string][]string // A list of blocks to ignore + Checks []string // All checks to load + Formats map[string]string // A map of unknown -> known formats + Asciidoctor map[string]string // A map of asciidoctor attributes + FormatToLang map[string]string // A map of format to lang ID + GBaseStyles []string // Global base style + GChecks map[string]bool // Global checks + IgnoredClasses []string // A list of HTML classes to ignore + IgnoredScopes []string // A list of HTML tags to ignore + MinAlertLevel int // Lowest alert level to display + Vocab []string // The active project + RuleToLevel map[string]string // Single-rule level changes + SBaseStyles map[string][]string // Syntax-specific base styles + SChecks map[string]map[string]bool // Syntax-specific checks + SkippedScopes []string // A list of HTML blocks to ignore + Stylesheets map[string]string // XSLT stylesheet + TokenIgnores map[string][]string // A list of tokens to ignore + CommentDelimiters map[string][2]string // Strings to treat as comment delimiters. Indicates the start and end delimiters. + WordTemplate string // The template used in YAML -> regexp list conversions + RootINI string // the path to the project's .vale.ini file + Paths []string // A list of paths to search for styles + ConfigFiles []string // A list of configuration files to load AcceptedTokens []string `json:"-"` // Project-specific vocabulary (okay) RejectedTokens []string `json:"-"` // Project-specific vocabulary (avoid) @@ -229,6 +230,7 @@ func NewConfig(flags *CLIFlags) (*Config, error) { cfg.SecToPat = make(map[string]glob.Glob) cfg.Stylesheets = make(map[string]string) cfg.TokenIgnores = make(map[string][]string) + cfg.CommentDelimiters = make(map[string][2]string) cfg.FormatToLang = make(map[string]string) cfg.Paths = []string{} cfg.ConfigFiles = []string{} diff --git a/internal/core/ini.go b/internal/core/ini.go index ffdb6597..b2a4d9eb 100644 --- a/internal/core/ini.go +++ b/internal/core/ini.go @@ -110,6 +110,20 @@ var syntaxOpts = map[string]func(string, *ini.Section, *Config) error{ cfg.BlockIgnores[label] = mergeValues(sec.Key("BlockIgnores").StringsWithShadows(",")) return nil }, + "CommentDelimiters": func(label string, sec *ini.Section, cfg *Config) error { //nolint:unparam + d := mergeValues(sec.Key("CommentDelimiters").StringsWithShadows(",")) + if len(d) != 2 { + return NewE201FromTarget( + fmt.Sprintf("CommentDelimiters must be a comma-separated list of two delimiters, but got %v items", len(d)), + label, + cfg.Flags.Path) + } + var c [2]string + c[0], c[1] = d[0], d[1] + cfg.CommentDelimiters[label] = c + return nil + + }, "TokenIgnores": func(label string, sec *ini.Section, cfg *Config) error { //nolint:unparam cfg.TokenIgnores[label] = mergeValues(sec.Key("TokenIgnores").StringsWithShadows(",")) return nil diff --git a/internal/core/ini_test.go b/internal/core/ini_test.go new file mode 100644 index 00000000..088aa094 --- /dev/null +++ b/internal/core/ini_test.go @@ -0,0 +1,96 @@ +package core + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func Test_processConfig_commentDelimiters(t *testing.T) { + cases := []struct { + description string + body string + expected map[string][2]string + }{ + { + description: "custom comment delimiters for markdown", + body: `[*.md] +CommentDelimiters = "{/*,*/}" +`, + expected: map[string][2]string{ + "*.md": [2]string{"{/*", "*/}"}, + }, + }, + { + description: "not set", + body: `[*.md] +TokenIgnores = (\$+[^\n$]+\$+) +`, + expected: map[string][2]string{}, + }, + } + + for _, c := range cases { + t.Run(c.description, func(t *testing.T) { + uCfg, err := shadowLoad([]byte(c.body)) + assert.NoError(t, err) + conf, err := NewConfig(&CLIFlags{}) + assert.NoError(t, err) + _, err = processConfig(uCfg, conf, false) + assert.NoError(t, err) + actual := conf.CommentDelimiters + assert.Equal(t, c.expected, actual) + }) + } +} + +func Test_processConfig_commentDelimiters_error(t *testing.T) { + cases := []struct { + description string + body string + expectedErr string + }{ + { + description: "global custom comment delimiters", + body: `[*] +CommentDelimiters = "{/*,*/}" +`, + expectedErr: "syntax-specific option", + }, + { + description: "more than two delimiters", + body: `[*.md] +CommentDelimiters = "{/*,*/},<<,>>" +`, + expectedErr: "CommentDelimiters must be a comma-separated list of two delimiters, but got 4 items", + }, + { + description: "more than two delimiters (shadow)", + body: `[*.md] +CommentDelimiters = "{/*,*/}" + +[*.md] +CommentDelimiters = "<<,>>" +`, + expectedErr: "CommentDelimiters must be a comma-separated list of two delimiters, but got 4 items", + }, + { + description: "one delimiter is empty", + body: `[*.md] +CommentDelimiters = "{/*" +`, + expectedErr: "CommentDelimiters must be a comma-separated list of two delimiters, but got 1 items", + }, + } + + for _, c := range cases { + t.Run(c.description, func(t *testing.T) { + uCfg, err := shadowLoad([]byte(c.body)) + assert.NoError(t, err) + conf, err := NewConfig(&CLIFlags{}) + assert.NoError(t, err) + _, err = processConfig(uCfg, conf, false) + assert.ErrorContains(t, err, c.expectedErr) + }) + } +} diff --git a/internal/lint/html.go b/internal/lint/html.go index 6dc73d79..6c9bcc06 100755 --- a/internal/lint/html.go +++ b/internal/lint/html.go @@ -24,25 +24,40 @@ func (l *Linter) lintHTML(f *core.File) error { return l.lintHTMLTokens(f, []byte(f.Content), 0) } -func (l *Linter) applyPatterns(f *core.File, block, inline string) (string, error) { +type extensionConfig struct { + Normed, Real string +} + +var blockDelimiters map[string]string = map[string]string{ + ".adoc": "\n----\n$1\n----\n", + ".md": "\n```\n$1\n```\n", + ".rst": "\n::\n\n%s\n", + ".org": orgExample, +} + +func applyBlockPatterns(c *core.Config, exts extensionConfig, content string) (string, error) { + block, ok := blockDelimiters[exts.Normed] + if !ok { + return content, fmt.Errorf("ignore patterns are not supported in '%s' files", exts.Normed) + } + // TODO: Should we assume this? - s := reFrontMatter.ReplaceAllString(f.Content, block) + s := reFrontMatter.ReplaceAllString(content, block) - exts := []string{f.NormedExt, f.RealExt} - for syntax, regexes := range l.Manager.Config.BlockIgnores { + for syntax, regexes := range c.BlockIgnores { sec, err := glob.Compile(syntax) if err != nil { return s, err - } else if sec.MatchAny(exts) { + } else if sec.Match(exts.Normed) || sec.Match(exts.Real) { for _, r := range regexes { pat, errc := regexp2.CompileStd(r) if errc != nil { //nolint:gocritic return s, core.NewE201FromTarget( errc.Error(), r, - l.Manager.Config.Flags.Path, + c.Flags.Path, ) - } else if strings.HasSuffix(f.NormedExt, ".rst") { + } else if strings.HasSuffix(exts.Normed, ".rst") { // HACK: We need to add padding for the literal block. for _, c := range pat.FindAllStringSubmatch(s, -1) { sec := fmt.Sprintf(block, core.Indent(c[0], " ")) @@ -54,39 +69,98 @@ func (l *Linter) applyPatterns(f *core.File, block, inline string) (string, erro return s, core.NewE201FromTarget( err.Error(), r, - l.Manager.Config.Flags.Path, + c.Flags.Path, ) } } } } } + return s, nil +} + +var inlineDelimiters map[string]string = map[string]string{ + ".adoc": "`$1`", + ".md": "`$1`", + ".rst": "``$1``", + ".org": "=$1=", +} + +func applyInlinePatterns(c *core.Config, exts extensionConfig, content string) (string, error) { + inline, ok := inlineDelimiters[exts.Normed] + if !ok { + return content, fmt.Errorf("ignore patterns are not supported in '%s' files", exts.Normed) + } - for syntax, regexes := range l.Manager.Config.TokenIgnores { + for syntax, regexes := range c.TokenIgnores { sec, err := glob.Compile(syntax) if err != nil { - return s, err - } else if sec.MatchAny(exts) { + return content, err + } else if sec.Match(exts.Normed) || sec.Match(exts.Real) { for _, r := range regexes { pat, errc := regexp2.CompileStd(r) if errc != nil { - return s, core.NewE201FromTarget( + return content, core.NewE201FromTarget( errc.Error(), r, - l.Manager.Config.Flags.Path, + c.Flags.Path, ) } - s, err = pat.Replace(s, inline, 0, -1) + content, err = pat.Replace(content, inline, 0, -1) if err != nil { - return s, core.NewE201FromTarget( + return content, core.NewE201FromTarget( err.Error(), r, - l.Manager.Config.Flags.Path, + c.Flags.Path, ) } } } } + return content, nil +} + +// applyCommentPatterns replaces any custom comment delimiters with HTML comment +// tags based on the user configuration. This makes it possible to apply +// comment-based controls using custom comment delimiters. +func applyCommentPatterns(c *core.Config, exts extensionConfig, content string) (string, error) { + for syntax, delims := range c.CommentDelimiters { + sec, err := glob.Compile(syntax) + if err != nil { + return content, err + } else if sec.Match(exts.Normed) || sec.Match(exts.Real) { + // This field was not assigned, so do nothing. + if delims[0] == "" && delims[1] == "" { + return content, nil + } + // Return an error if only one delimiter is configured + if (delims[0] == "" && delims[1] != "") || (delims[0] != "" && delims[1] == "") { + return content, fmt.Errorf("CommentDelimiters must be empty or have two values") + } + + content = strings.ReplaceAll(content, delims[0], "") + + } + } + return content, nil +} + +func applyPatterns(c *core.Config, exts extensionConfig, content string) (string, error) { + s, err := applyBlockPatterns(c, exts, content) + if err != nil { + return s, err + } + + s, err = applyInlinePatterns(c, exts, s) + if err != nil { + return s, err + } + + s, err = applyCommentPatterns(c, exts, s) + if err != nil { + return s, err + } return s, nil } diff --git a/internal/lint/html_test.go b/internal/lint/html_test.go new file mode 100644 index 00000000..49b318f7 --- /dev/null +++ b/internal/lint/html_test.go @@ -0,0 +1,167 @@ +package lint + +import ( + "strings" + "testing" + + "github.com/errata-ai/vale/v3/internal/core" + "github.com/stretchr/testify/assert" +) + +func Test_applyPatterns(t *testing.T) { + cases := []struct { + description string + conf core.Config + exts extensionConfig + content string + expected string + }{ + { + description: "MDX comment in markdown, custom comment delimiter", + conf: core.Config{ + CommentDelimiters: map[string][2]string{ + ".md": [2]string{"{/*", "*/}"}, + }, + }, + exts: extensionConfig{".md", ".md"}, + content: `--- +title: Example page +description: Example page +--- + +This is the intro pagragraph. + +{/* This is a comment */} +`, + expected: strings.ReplaceAll(` +@@@ +title: Example page +description: Example page +@@@ + + +This is the intro pagragraph. + + +`, "@", "`"), + }, + { + description: "MDX comment in markdown, no custom comment delimiter", + conf: core.Config{}, + exts: extensionConfig{".md", ".md"}, + content: `--- +title: Example page +description: Example page +--- + +This is the intro pagragraph. + +{/* This is a comment */} +`, + expected: strings.ReplaceAll(` +@@@ +title: Example page +description: Example page +@@@ + + +This is the intro pagragraph. + +{/* This is a comment */} +`, "@", "`"), + }, + { + description: "multiline MDX comment in markdown, custom comment delimiter", + conf: core.Config{ + CommentDelimiters: map[string][2]string{ + ".md": [2]string{"{/*", "*/}"}, + }, + }, + exts: extensionConfig{".md", ".md"}, + content: `--- +title: Example page +description: Example page +--- + +This is the intro pagragraph. + +{/* +This is a comment +*/} +`, + expected: strings.ReplaceAll(` +@@@ +title: Example page +description: Example page +@@@ + + +This is the intro pagragraph. + + +`, "@", "`"), + }, + { + description: "token ignore in cc file", + content: "Call \\c func to start the process.", + conf: core.Config{ + TokenIgnores: map[string][]string{ + "*.cc": []string{`(\\c \w+)`}, + }, + Formats: map[string]string{ + "cc": "md", + }, + }, + exts: extensionConfig{".md", ".cc"}, + expected: "Call `\\c func` to start the process.", + }, + } + + for _, c := range cases { + t.Run(c.description, func(t *testing.T) { + s, err := applyPatterns(&c.conf, c.exts, c.content) + assert.NoError(t, err) + assert.Equal(t, c.expected, s) + }) + } +} + +func Test_applyPatterns_errors(t *testing.T) { + cases := []struct { + description string + conf core.Config + exts extensionConfig + content string + expectedErr string + }{ + { + description: "only one delimiter", + conf: core.Config{ + CommentDelimiters: map[string][2]string{ + ".md": [2]string{"{/*", ""}, + }, + }, + exts: extensionConfig{".md", ".md"}, + content: `--- +title: Example page +description: Example page +--- + +This is the intro pagragraph. + +{/* This is a comment */} +`, + expectedErr: "", + }, + } + for _, c := range cases { + t.Run(c.description, func(t *testing.T) { + _, err := applyPatterns(&c.conf, c.exts, c.content) + assert.ErrorContains(t, err, c.expectedErr) + }) + } +} + +// TODO: Test for expected errors resulting from applyPatterns diff --git a/internal/lint/lint.go b/internal/lint/lint.go index e2c85847..66c9a5b6 100755 --- a/internal/lint/lint.go +++ b/internal/lint/lint.go @@ -2,7 +2,6 @@ package lint import ( "errors" - "fmt" "net/http" "os" "path/filepath" @@ -56,18 +55,12 @@ func NewLinter(cfg *core.Config) (*Linter, error) { // Transformations include block and token ignores, as well as some built-in // replacements. func (l *Linter) Transform(f *core.File) (string, error) { - switch f.NormedExt { - case ".adoc": - return l.applyPatterns(f, "\n----\n$1\n----\n", "`$1`") - case ".md": - return l.applyPatterns(f, "\n```\n$1\n```\n", "`$1`") - case ".rst": - return l.applyPatterns(f, "\n::\n\n%s\n", "``$1``") - case ".org": - return l.applyPatterns(f, orgExample, "=$1=") - default: - return f.Content, fmt.Errorf("ignore patterns are not supported in '%s' files", f.NormedExt) + exts := extensionConfig{ + Normed: f.NormedExt, + Real: f.RealExt, } + + return applyPatterns(l.Manager.Config, exts, f.Content) } // LintString src according to its format.