From e000df3b723c99879c4234fe2f614ab2321b3b04 Mon Sep 17 00:00:00 2001 From: h1alexbel Date: Tue, 10 Dec 2024 22:17:49 +0300 Subject: [PATCH 1/4] feat(#129): stanfordnlp --- pom.xml | 12 ++- .../java/org/eolang/lints/ProgramLints.java | 30 ++------ .../eolang/lints/misc/UnitTestIsNotVerb.java | 76 ++++++++++--------- .../lints/misc/UnitTestIsNotVerbTest.java | 2 +- .../bad-tests.eo | 8 ++ .../good-tests.eo | 8 -- 6 files changed, 65 insertions(+), 71 deletions(-) diff --git a/pom.xml b/pom.xml index e4ad6bdc..75d42bcd 100644 --- a/pom.xml +++ b/pom.xml @@ -109,9 +109,15 @@ SOFTWARE. - org.apache.opennlp - opennlp-tools - 2.2.0 + edu.stanford.nlp + stanford-corenlp + 4.5.7 + + + edu.stanford.nlp + stanford-corenlp + 4.5.7 + models org.yaml diff --git a/src/main/java/org/eolang/lints/ProgramLints.java b/src/main/java/org/eolang/lints/ProgramLints.java index 345d80c7..180434ef 100644 --- a/src/main/java/org/eolang/lints/ProgramLints.java +++ b/src/main/java/org/eolang/lints/ProgramLints.java @@ -24,8 +24,6 @@ package org.eolang.lints; import com.jcabi.xml.XML; -import java.io.IOException; -import java.net.URISyntaxException; import java.util.Arrays; import org.cactoos.Scalar; import org.cactoos.iterable.Joined; @@ -41,26 +39,14 @@ public final class ProgramLints implements Scalar>> { @Override public Iterable> value() { - try { - return new Sticky<>( - new Joined>( - new XslLints(), - Arrays.asList( - new AsciiOnly(), - new UnitTestIsNotVerb() - ) + return new Sticky<>( + new Joined>( + new XslLints(), + Arrays.asList( + new AsciiOnly(), + new UnitTestIsNotVerb() ) - ); - } catch (final IOException exception) { - throw new IllegalStateException( - "Failed to allocate lints", - exception - ); - } catch (final URISyntaxException exception) { - throw new IllegalStateException( - "URI syntax is broken", - exception - ); - } + ) + ); } } diff --git a/src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java b/src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java index f3fcf46c..970c9aea 100644 --- a/src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java +++ b/src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java @@ -24,19 +24,19 @@ package org.eolang.lints.misc; import com.jcabi.xml.XML; +import edu.stanford.nlp.ling.CoreAnnotations; +import edu.stanford.nlp.pipeline.CoreDocument; +import edu.stanford.nlp.pipeline.StanfordCoreNLP; import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; import java.util.Arrays; import java.util.Collection; import java.util.LinkedList; import java.util.Locale; +import java.util.Properties; import java.util.regex.Pattern; +import java.util.stream.Collectors; import java.util.stream.Stream; -import opennlp.tools.postag.POSModel; -import opennlp.tools.postag.POSTaggerME; import org.cactoos.io.ResourceOf; -import org.cactoos.list.ListOf; import org.cactoos.text.TextOf; import org.eolang.lints.Defect; import org.eolang.lints.Lint; @@ -60,42 +60,31 @@ public final class UnitTestIsNotVerb implements Lint { private static final Pattern KEBAB = Pattern.compile("-"); /** - * The Open NLP tagger. + * NLP pipeline. */ - private final POSTaggerME model; + private final StanfordCoreNLP pipeline; /** * Ctor. - * @throws IOException if something went wrong. + * @param props Pipeline properties */ - public UnitTestIsNotVerb() throws IOException, URISyntaxException { - this("https://opennlp.sourceforge.net/models-1.5/en-pos-perceptron.bin"); + public UnitTestIsNotVerb(final Properties props) { + this(new StanfordCoreNLP(props)); } /** * Ctor. - * @param url Model URL - * @throws IOException if something went wrong. */ - public UnitTestIsNotVerb(final String url) throws IOException, URISyntaxException { - this(new POSModel(new URI(url).toURL())); + public UnitTestIsNotVerb() { + this(defaults()); } /** - * Ctor. - * @param pos POS model. - */ - public UnitTestIsNotVerb(final POSModel pos) { - this(new POSTaggerME(pos)); - } - - /** - * Ctor. - * - * @param mdl The Open NLP tagger. + * Primary ctor. + * @param pipe NLP pipeline */ - public UnitTestIsNotVerb(final POSTaggerME mdl) { - this.model = mdl; + public UnitTestIsNotVerb(final StanfordCoreNLP pipe) { + this.pipeline = pipe; } @Override @@ -103,17 +92,20 @@ public Collection defects(final XML xmir) throws IOException { final Collection defects = new LinkedList<>(); for (final XML object : xmir.nodes("/program[metas/meta[head='tests']]/objects/o[@name]")) { final String name = object.xpath("@name").get(0); - final String first = new ListOf<>( - this.model.tag( - Stream - .concat( - Stream.of("It"), - Arrays.stream(UnitTestIsNotVerb.KEBAB.split(name)) - ).map(s -> s.toLowerCase(Locale.ROOT)) - .toArray(String[]::new) + final CoreDocument doc = new CoreDocument( + Stream + .concat( + Stream.of("It"), + Arrays.stream(UnitTestIsNotVerb.KEBAB.split(name)) + ).map(s -> s.toLowerCase(Locale.ROOT)) + .collect(Collectors.joining(" ")) + ); + this.pipeline.annotate(doc); + if ( + !"VBZ".equals( + doc.tokens().get(1).get(CoreAnnotations.PartOfSpeechAnnotation.class) ) - ).get(1); - if (!("VB".equals(first) || "VBP".equals(first) || "VBZ".equals(first))) { + ) { defects.add( new Defect.Default( "unit-test-is-not-verb", @@ -139,4 +131,14 @@ public String motive() throws Exception { ) ).asString(); } + + /** + * Prestructor for default properties. + * @return Properties. + */ + private static Properties defaults() { + final Properties props = new Properties(); + props.setProperty("annotators", "tokenize,pos"); + return props; + } } diff --git a/src/test/java/org/eolang/lints/misc/UnitTestIsNotVerbTest.java b/src/test/java/org/eolang/lints/misc/UnitTestIsNotVerbTest.java index 4670ecc2..531cb361 100644 --- a/src/test/java/org/eolang/lints/misc/UnitTestIsNotVerbTest.java +++ b/src/test/java/org/eolang/lints/misc/UnitTestIsNotVerbTest.java @@ -66,7 +66,7 @@ void catchesBadName() throws Exception { ) ).parsed() ), - Matchers.hasSize(38) + Matchers.hasSize(40) ); } diff --git a/src/test/resources/org/eolang/lints/misc/test-object-is-not-verb-in-singular/bad-tests.eo b/src/test/resources/org/eolang/lints/misc/test-object-is-not-verb-in-singular/bad-tests.eo index f58ae654..01175755 100644 --- a/src/test/resources/org/eolang/lints/misc/test-object-is-not-verb-in-singular/bad-tests.eo +++ b/src/test/resources/org/eolang/lints/misc/test-object-is-not-verb-in-singular/bad-tests.eo @@ -173,3 +173,11 @@ # Test. [] > chicken-as-expected 42 > @ + +# Test +[] > please-reboot + 42 > @ + +# Test. +[] > hope-it-works + 42 > @ diff --git a/src/test/resources/org/eolang/lints/misc/test-object-is-not-verb-in-singular/good-tests.eo b/src/test/resources/org/eolang/lints/misc/test-object-is-not-verb-in-singular/good-tests.eo index 90909b82..d640754e 100644 --- a/src/test/resources/org/eolang/lints/misc/test-object-is-not-verb-in-singular/good-tests.eo +++ b/src/test/resources/org/eolang/lints/misc/test-object-is-not-verb-in-singular/good-tests.eo @@ -113,11 +113,3 @@ # Test. [] > is-almost-correct 42 > @ - -# Test -[] > please-reboot - 42 > @ - -# Test. -[] > hope-it-works - 42 > @ From 3e1927e8da251e43ea68a3db17102c947f208559 Mon Sep 17 00:00:00 2001 From: h1alexbel Date: Tue, 10 Dec 2024 22:22:05 +0300 Subject: [PATCH 2/4] feat(#129): no puzzle --- src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java b/src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java index 970c9aea..1bda2e59 100644 --- a/src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java +++ b/src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java @@ -46,11 +46,6 @@ * Lint that checks test object name is a verb in singular. * * @since 0.0.22 - * @todo #72:60min Configure maven to download model file during the build and place into the JAR. - * Currently, we download model file each time when creating the lint, which may - * be slow in the usage of this lint. Instead, let's configure maven to download - * model file during the build, and place into JAR, so lint will be able to locate - * file from resources faster. */ public final class UnitTestIsNotVerb implements Lint { From 67a3c3d41902c32a7954852253da08fdd6f7ee08 Mon Sep 17 00:00:00 2001 From: h1alexbel Date: Wed, 11 Dec 2024 13:48:04 +0300 Subject: [PATCH 3/4] feat(#129): dots formatting --- src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java b/src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java index 1bda2e59..a44bfe74 100644 --- a/src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java +++ b/src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java @@ -92,7 +92,8 @@ public Collection defects(final XML xmir) throws IOException { .concat( Stream.of("It"), Arrays.stream(UnitTestIsNotVerb.KEBAB.split(name)) - ).map(s -> s.toLowerCase(Locale.ROOT)) + ) + .map(s -> s.toLowerCase(Locale.ROOT)) .collect(Collectors.joining(" ")) ); this.pipeline.annotate(doc); From 083856ac60b2ef3988d3f1828154a8afebf2e928 Mon Sep 17 00:00:00 2001 From: h1alexbel Date: Wed, 11 Dec 2024 14:15:19 +0300 Subject: [PATCH 4/4] feat(#129): docs --- .../org/eolang/lints/misc/UnitTestIsNotVerb.java | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java b/src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java index a44bfe74..595ef308 100644 --- a/src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java +++ b/src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java @@ -44,8 +44,20 @@ /** * Lint that checks test object name is a verb in singular. - * + * This lint uses Stanford CoreNLP model + * with POS tagging capabilities in order to determine the part of speech and + * tense for test object name. Originally, we used OpenNLP + * library to do that, but switched to the Stanford CoreNLP, due to merging all + * verb tags into single `VERB` POS tag, that sacrifices important information + * for us about verb tenses, and appeared in OpenNLP 2.4.0+. You can read more + * about the reason of this here + * and here. * @since 0.0.22 + * @todo #129:60min Library stanford-corenlp-4.5.7-models.jar takes too much in size. + * Currently, JAR takes ~452mb, which may cause some troubles to the users of + * the lints library. Let's think what we can do about this. We should check is + * it possible to get rid of this dependency and download models from the other + * source. */ public final class UnitTestIsNotVerb implements Lint {