chore: get nlp passing new ci config

errata-ai · Aug 28, 2023 · 3eb5d4d · 3eb5d4d
1 parent 287ae02
commit 3eb5d4d
Show file tree

Hide file tree

Showing 8 changed files with 30 additions and 30 deletions.
diff --git a/cmd/vale/command.go b/cmd/vale/command.go
@@ -155,7 +155,7 @@ func runTag(args []string, flags *core.CLIFlags) error {
  }
 
  out := core.TextToContext(
- string(text), &nlp.NLPInfo{Lang: args[1], Endpoint: args[2]})
+ string(text), &nlp.Info{Lang: args[1], Endpoint: args[2]})
 
  return printJSON(out)
 }

diff --git a/internal/check/manager.go b/internal/check/manager.go
@@ -103,8 +103,8 @@ func (mgr *Manager) NeedsTagging() bool {
 }
 
 // AssignNLP determines what NLP tasks a file needs.
-func (mgr *Manager) AssignNLP(f *core.File) nlp.NLPInfo {
- return nlp.NLPInfo{
+func (mgr *Manager) AssignNLP(f *core.File) nlp.Info {
+ return nlp.Info{
  Scope: f.RealExt,
  Segmentation: mgr.HasScope("sentence"),
  Splitting: mgr.HasScope("paragraph"),

diff --git a/internal/core/file.go b/internal/core/file.go
@@ -20,7 +20,7 @@ var commentControlRE = regexp.MustCompile(`^vale (.+\..+) = (YES|NO)$`)
 
 // A File represents a linted text file.
 type File struct {
- NLP nlp.NLPInfo // -
+ NLP nlp.Info  // -
  Summary bytes.Buffer // holds content to be included in summarization checks
  Alerts []Alert // all alerts associated with this file
  BaseStyles []string // base style assigned in .vale
@@ -117,7 +117,7 @@ func NewFile(src string, config *Config) (*File, error) {
  Comments: make(map[string]bool), history: make(map[string]int),
  simple: config.Flags.Simple, Transform: transform,
  limits: make(map[string]int), Path: src, Metrics: make(map[string]int),
- NLP: nlp.NLPInfo{Endpoint: config.NLPEndpoint, Lang: lang},
+ NLP: nlp.Info{Endpoint: config.NLPEndpoint, Lang: lang},
  Lookup: lookup,
  }
 

diff --git a/internal/core/util.go b/internal/core/util.go
@@ -307,7 +307,7 @@ func loadVocab(root string, cfg *Config) error {
  return err
 }
 
-func TextToContext(text string, meta *nlp.NLPInfo) []nlp.TaggedWord {
+func TextToContext(text string, meta *nlp.Info) []nlp.TaggedWord {
  context := []nlp.TaggedWord{}
 
  for idx, line := range strings.Split(text, "\n") {

diff --git a/internal/nlp/http.go b/internal/nlp/http.go
@@ -20,7 +20,7 @@ type TagResult struct {
 func post(url string) ([]byte, error) {
  var body []byte
 
- resp, err := http.Post(url, "application/x-www-form-urlencoded", nil)
+ resp, err := http.Post(url, "application/x-www-form-urlencoded", nil) //nolint:gosec,noctx
  if err != nil {
  return body, err
  }

diff --git a/internal/nlp/prose.go b/internal/nlp/prose.go
@@ -52,7 +52,7 @@ func textToWords(text string, nlp bool) []string {
 }
 
 // TextToTokens converts a string to a slice of tokens.
-func TextToTokens(text string, nlp *NLPInfo) []tag.Token {
+func TextToTokens(text string, nlp *Info) []tag.Token {
  // Determine if (and how) we need to do POS tagging.
  if nlp == nil || nlp.Endpoint == "" {
  // Fall back to our internal library (English-only).

diff --git a/internal/nlp/provider.go b/internal/nlp/provider.go
@@ -35,7 +35,7 @@ func NewBlockWithParent(ctx, txt, sel, parent string) Block {
 }
 
 // NewLinedBlock creates a Block with an already-known location.
-func NewLinedBlock(ctx, txt, sel string, line int, nlp *NLPInfo) Block {
+func NewLinedBlock(ctx, txt, sel string, line int, _ *Info) Block {
  if ctx == "" {
  ctx = txt
  }
@@ -48,11 +48,11 @@ func NewLinedBlock(ctx, txt, sel string, line int, nlp *NLPInfo) Block {
  Line: line}
 }
 
-// NLPInfo handles NLP-related tasks.
+// Info handles NLP-related tasks.
 //
 // Assigning this on a per-file basis allows us to handle multi-language
 // projects -- one file might be `en` while another is `ja`, for example.
-type NLPInfo struct {
+type Info struct {
  Lang string // Language of the file.
  Endpoint string // API endpoint (optional); TODO: should this be per-file?
  Scope string // The file's ext scope.
@@ -67,7 +67,7 @@ type NLPInfo struct {
 // The default implementation is the pure-Go prose library, but the goal is to
 // allow (fairly) seamless integration with non-Go libraries too (such as
 // spaCy).
-func (n *NLPInfo) Compute(block *Block) ([]Block, error) {
+func (n *Info) Compute(block *Block) ([]Block, error) {
  seg := SentenceTokenizer.Tokenize
  if n.Endpoint != "" && n.Lang != "en" {
  // We only use external segmentation for non-English text since prose
@@ -83,7 +83,7 @@ func (n *NLPInfo) Compute(block *Block) ([]Block, error) {
  return n.doNLP(block, seg)
 }
 
-func (n *NLPInfo) doNLP(blk *Block, seg segmenter) ([]Block, error) {
+func (n *Info) doNLP(blk *Block, seg segmenter) ([]Block, error) {
  blks := []Block{}
 
  ctx := blk.Context

diff --git a/internal/nlp/tokenize.go b/internal/nlp/tokenize.go
@@ -13,8 +13,8 @@ type Tokenizer interface {
  Tokenize(string) []string
 }
 
-// iterTokenizer splits a sentence into words.
-type iterTokenizer struct {
+// IterTokenizer splits a sentence into words.
+type IterTokenizer struct {
  specialRE *regexp.Regexp
  sanitizer *strings.Replacer
  contractions []string
@@ -25,67 +25,67 @@ type iterTokenizer struct {
  isUnsplittable TokenTester
 }
 
-type TokenizerOptFunc func(*iterTokenizer)
+type TokenizerOptFunc func(*IterTokenizer)
 
 // UsingIsUnsplittable gives a function that tests whether a token is splittable or not.
 func UsingIsUnsplittable(x TokenTester) TokenizerOptFunc {
- return func(tokenizer *iterTokenizer) {
+ return func(tokenizer *IterTokenizer) {
  tokenizer.isUnsplittable = x
  }
 }
 
 // UsingSpecialRE sets the provided special regex for unsplittable tokens.
 func UsingSpecialRE(x *regexp.Regexp) TokenizerOptFunc {
- return func(tokenizer *iterTokenizer) {
+ return func(tokenizer *IterTokenizer) {
  tokenizer.specialRE = x
  }
 }
 
 // UsingSanitizer sets the provided sanitizer.
 func UsingSanitizer(x *strings.Replacer) TokenizerOptFunc {
- return func(tokenizer *iterTokenizer) {
+ return func(tokenizer *IterTokenizer) {
  tokenizer.sanitizer = x
  }
 }
 
 // UsingSuffixes sets the provided suffixes.
 func UsingSuffixes(x []string) TokenizerOptFunc {
- return func(tokenizer *iterTokenizer) {
+ return func(tokenizer *IterTokenizer) {
  tokenizer.suffixes = x
  }
 }
 
 // UsingPrefixes sets the provided prefixes.
 func UsingPrefixes(x []string) TokenizerOptFunc {
- return func(tokenizer *iterTokenizer) {
+ return func(tokenizer *IterTokenizer) {
  tokenizer.prefixes = x
  }
 }
 
 // UsingEmoticons sets the provided map of emoticons.
 func UsingEmoticons(x map[string]int) TokenizerOptFunc {
- return func(tokenizer *iterTokenizer) {
+ return func(tokenizer *IterTokenizer) {
  tokenizer.emoticons = x
  }
 }
 
 // UsingContractions sets the provided contractions.
 func UsingContractions(x []string) TokenizerOptFunc {
- return func(tokenizer *iterTokenizer) {
+ return func(tokenizer *IterTokenizer) {
  tokenizer.contractions = x
  }
 }
 
 // UsingSplitCases sets the provided splitCases.
 func UsingSplitCases(x []string) TokenizerOptFunc {
- return func(tokenizer *iterTokenizer) {
+ return func(tokenizer *IterTokenizer) {
  tokenizer.splitCases = x
  }
 }
 
 // NewIterTokenizer creates a new iterTokenizer.
-func NewIterTokenizer(opts ...TokenizerOptFunc) *iterTokenizer {
- tok := new(iterTokenizer)
+func NewIterTokenizer(opts ...TokenizerOptFunc) *IterTokenizer {
+ tok := new(IterTokenizer)
 
  // Set default parameters
  tok.emoticons = emoticons
@@ -110,12 +110,12 @@ func addToken(s string, toks []string) []string {
  return toks
 }
 
-func (t *iterTokenizer) isSpecial(token string) bool {
+func (t *IterTokenizer) isSpecial(token string) bool {
  _, found := t.emoticons[token]
  return found || t.specialRE.MatchString(token) || t.isUnsplittable(token)
 }
 
-func (t *iterTokenizer) doSplit(token string) []string {
+func (t *IterTokenizer) doSplit(token string) []string {
  var tokens []string
 
  last := 0
@@ -151,7 +151,7 @@ func (t *iterTokenizer) doSplit(token string) []string {
 }
 
 // Tokenize splits a sentence into a slice of words.
-func (t *iterTokenizer) Tokenize(text string) []string {
+func (t *IterTokenizer) Tokenize(text string) []string {
  var tokens []string
 
  clean, white := t.sanitizer.Replace(text), false
@@ -172,7 +172,7 @@ func (t *iterTokenizer) Tokenize(text string) []string {
  if toks, found := cache[span]; found {
  tokens = append(tokens, toks...)
  } else {
- toks := t.doSplit(span)
+ toks = t.doSplit(span)
  cache[span] = toks
  tokens = append(tokens, toks...)
  }