Skip to content

Commit

Permalink
chore: get nlp passing new ci config
Browse files Browse the repository at this point in the history
  • Loading branch information
jdkato committed Aug 28, 2023
1 parent 287ae02 commit 3eb5d4d
Show file tree
Hide file tree
Showing 8 changed files with 30 additions and 30 deletions.
2 changes: 1 addition & 1 deletion cmd/vale/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ func runTag(args []string, flags *core.CLIFlags) error {
}

out := core.TextToContext(
string(text), &nlp.NLPInfo{Lang: args[1], Endpoint: args[2]})
string(text), &nlp.Info{Lang: args[1], Endpoint: args[2]})

return printJSON(out)
}
Expand Down
4 changes: 2 additions & 2 deletions internal/check/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ func (mgr *Manager) NeedsTagging() bool {
}

// AssignNLP determines what NLP tasks a file needs.
func (mgr *Manager) AssignNLP(f *core.File) nlp.NLPInfo {
return nlp.NLPInfo{
func (mgr *Manager) AssignNLP(f *core.File) nlp.Info {
return nlp.Info{
Scope: f.RealExt,
Segmentation: mgr.HasScope("sentence"),
Splitting: mgr.HasScope("paragraph"),
Expand Down
4 changes: 2 additions & 2 deletions internal/core/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ var commentControlRE = regexp.MustCompile(`^vale (.+\..+) = (YES|NO)$`)

// A File represents a linted text file.
type File struct {
NLP nlp.NLPInfo // -
NLP nlp.Info // -
Summary bytes.Buffer // holds content to be included in summarization checks
Alerts []Alert // all alerts associated with this file
BaseStyles []string // base style assigned in .vale
Expand Down Expand Up @@ -117,7 +117,7 @@ func NewFile(src string, config *Config) (*File, error) {
Comments: make(map[string]bool), history: make(map[string]int),
simple: config.Flags.Simple, Transform: transform,
limits: make(map[string]int), Path: src, Metrics: make(map[string]int),
NLP: nlp.NLPInfo{Endpoint: config.NLPEndpoint, Lang: lang},
NLP: nlp.Info{Endpoint: config.NLPEndpoint, Lang: lang},
Lookup: lookup,
}

Expand Down
2 changes: 1 addition & 1 deletion internal/core/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ func loadVocab(root string, cfg *Config) error {
return err
}

func TextToContext(text string, meta *nlp.NLPInfo) []nlp.TaggedWord {
func TextToContext(text string, meta *nlp.Info) []nlp.TaggedWord {
context := []nlp.TaggedWord{}

for idx, line := range strings.Split(text, "\n") {
Expand Down
2 changes: 1 addition & 1 deletion internal/nlp/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ type TagResult struct {
func post(url string) ([]byte, error) {
var body []byte

resp, err := http.Post(url, "application/x-www-form-urlencoded", nil)
resp, err := http.Post(url, "application/x-www-form-urlencoded", nil) //nolint:gosec,noctx
if err != nil {
return body, err
}
Expand Down
2 changes: 1 addition & 1 deletion internal/nlp/prose.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func textToWords(text string, nlp bool) []string {
}

// TextToTokens converts a string to a slice of tokens.
func TextToTokens(text string, nlp *NLPInfo) []tag.Token {
func TextToTokens(text string, nlp *Info) []tag.Token {
// Determine if (and how) we need to do POS tagging.
if nlp == nil || nlp.Endpoint == "" {
// Fall back to our internal library (English-only).
Expand Down
10 changes: 5 additions & 5 deletions internal/nlp/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func NewBlockWithParent(ctx, txt, sel, parent string) Block {
}

// NewLinedBlock creates a Block with an already-known location.
func NewLinedBlock(ctx, txt, sel string, line int, nlp *NLPInfo) Block {
func NewLinedBlock(ctx, txt, sel string, line int, _ *Info) Block {
if ctx == "" {
ctx = txt
}
Expand All @@ -48,11 +48,11 @@ func NewLinedBlock(ctx, txt, sel string, line int, nlp *NLPInfo) Block {
Line: line}
}

// NLPInfo handles NLP-related tasks.
// Info handles NLP-related tasks.
//
// Assigning this on a per-file basis allows us to handle multi-language
// projects -- one file might be `en` while another is `ja`, for example.
type NLPInfo struct {
type Info struct {
Lang string // Language of the file.
Endpoint string // API endpoint (optional); TODO: should this be per-file?
Scope string // The file's ext scope.
Expand All @@ -67,7 +67,7 @@ type NLPInfo struct {
// The default implementation is the pure-Go prose library, but the goal is to
// allow (fairly) seamless integration with non-Go libraries too (such as
// spaCy).
func (n *NLPInfo) Compute(block *Block) ([]Block, error) {
func (n *Info) Compute(block *Block) ([]Block, error) {
seg := SentenceTokenizer.Tokenize
if n.Endpoint != "" && n.Lang != "en" {
// We only use external segmentation for non-English text since prose
Expand All @@ -83,7 +83,7 @@ func (n *NLPInfo) Compute(block *Block) ([]Block, error) {
return n.doNLP(block, seg)
}

func (n *NLPInfo) doNLP(blk *Block, seg segmenter) ([]Block, error) {
func (n *Info) doNLP(blk *Block, seg segmenter) ([]Block, error) {
blks := []Block{}

ctx := blk.Context
Expand Down
34 changes: 17 additions & 17 deletions internal/nlp/tokenize.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ type Tokenizer interface {
Tokenize(string) []string
}

// iterTokenizer splits a sentence into words.
type iterTokenizer struct {
// IterTokenizer splits a sentence into words.
type IterTokenizer struct {
specialRE *regexp.Regexp
sanitizer *strings.Replacer
contractions []string
Expand All @@ -25,67 +25,67 @@ type iterTokenizer struct {
isUnsplittable TokenTester
}

type TokenizerOptFunc func(*iterTokenizer)
type TokenizerOptFunc func(*IterTokenizer)

// UsingIsUnsplittable gives a function that tests whether a token is splittable or not.
func UsingIsUnsplittable(x TokenTester) TokenizerOptFunc {
return func(tokenizer *iterTokenizer) {
return func(tokenizer *IterTokenizer) {
tokenizer.isUnsplittable = x
}
}

// UsingSpecialRE sets the provided special regex for unsplittable tokens.
func UsingSpecialRE(x *regexp.Regexp) TokenizerOptFunc {
return func(tokenizer *iterTokenizer) {
return func(tokenizer *IterTokenizer) {
tokenizer.specialRE = x
}
}

// UsingSanitizer sets the provided sanitizer.
func UsingSanitizer(x *strings.Replacer) TokenizerOptFunc {
return func(tokenizer *iterTokenizer) {
return func(tokenizer *IterTokenizer) {
tokenizer.sanitizer = x
}
}

// UsingSuffixes sets the provided suffixes.
func UsingSuffixes(x []string) TokenizerOptFunc {
return func(tokenizer *iterTokenizer) {
return func(tokenizer *IterTokenizer) {
tokenizer.suffixes = x
}
}

// UsingPrefixes sets the provided prefixes.
func UsingPrefixes(x []string) TokenizerOptFunc {
return func(tokenizer *iterTokenizer) {
return func(tokenizer *IterTokenizer) {
tokenizer.prefixes = x
}
}

// UsingEmoticons sets the provided map of emoticons.
func UsingEmoticons(x map[string]int) TokenizerOptFunc {
return func(tokenizer *iterTokenizer) {
return func(tokenizer *IterTokenizer) {
tokenizer.emoticons = x
}
}

// UsingContractions sets the provided contractions.
func UsingContractions(x []string) TokenizerOptFunc {
return func(tokenizer *iterTokenizer) {
return func(tokenizer *IterTokenizer) {
tokenizer.contractions = x
}
}

// UsingSplitCases sets the provided splitCases.
func UsingSplitCases(x []string) TokenizerOptFunc {
return func(tokenizer *iterTokenizer) {
return func(tokenizer *IterTokenizer) {
tokenizer.splitCases = x
}
}

// NewIterTokenizer creates a new iterTokenizer.
func NewIterTokenizer(opts ...TokenizerOptFunc) *iterTokenizer {
tok := new(iterTokenizer)
func NewIterTokenizer(opts ...TokenizerOptFunc) *IterTokenizer {
tok := new(IterTokenizer)

// Set default parameters
tok.emoticons = emoticons
Expand All @@ -110,12 +110,12 @@ func addToken(s string, toks []string) []string {
return toks
}

func (t *iterTokenizer) isSpecial(token string) bool {
func (t *IterTokenizer) isSpecial(token string) bool {
_, found := t.emoticons[token]
return found || t.specialRE.MatchString(token) || t.isUnsplittable(token)
}

func (t *iterTokenizer) doSplit(token string) []string {
func (t *IterTokenizer) doSplit(token string) []string {
var tokens []string

last := 0
Expand Down Expand Up @@ -151,7 +151,7 @@ func (t *iterTokenizer) doSplit(token string) []string {
}

// Tokenize splits a sentence into a slice of words.
func (t *iterTokenizer) Tokenize(text string) []string {
func (t *IterTokenizer) Tokenize(text string) []string {
var tokens []string

clean, white := t.sanitizer.Replace(text), false
Expand All @@ -172,7 +172,7 @@ func (t *iterTokenizer) Tokenize(text string) []string {
if toks, found := cache[span]; found {
tokens = append(tokens, toks...)
} else {
toks := t.doSplit(span)
toks = t.doSplit(span)
cache[span] = toks
tokens = append(tokens, toks...)
}
Expand Down

0 comments on commit 3eb5d4d

Please sign in to comment.