Skip to content

Commit

Permalink
Merge branch '__rultor'
Browse files Browse the repository at this point in the history
  • Loading branch information
rultor committed Dec 11, 2024
2 parents 61d88a2 + 083856a commit 82a29b9
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 77 deletions.
12 changes: 9 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,15 @@ SOFTWARE.
<!-- version from the parent pom -->
</dependency>
<dependency>
<groupId>org.apache.opennlp</groupId>
<artifactId>opennlp-tools</artifactId>
<version>2.2.0</version>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>4.5.7</version>
</dependency>
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>4.5.7</version>
<classifier>models</classifier>
</dependency>
<dependency>
<groupId>org.yaml</groupId>
Expand Down
30 changes: 8 additions & 22 deletions src/main/java/org/eolang/lints/ProgramLints.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@
package org.eolang.lints;

import com.jcabi.xml.XML;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Arrays;
import org.cactoos.Scalar;
import org.cactoos.iterable.Joined;
Expand All @@ -42,26 +40,14 @@ final class ProgramLints implements Scalar<Iterable<Lint<XML>>> {

@Override
public Iterable<Lint<XML>> value() {
try {
return new Sticky<>(
new Joined<Lint<XML>>(
new XslLints(),
Arrays.asList(
new AsciiOnly(),
new UnitTestIsNotVerb()
)
return new Sticky<>(
new Joined<Lint<XML>>(
new XslLints(),
Arrays.asList(
new AsciiOnly(),
new UnitTestIsNotVerb()
)
);
} catch (final IOException exception) {
throw new IllegalStateException(
"Failed to allocate lints",
exception
);
} catch (final URISyntaxException exception) {
throw new IllegalStateException(
"URI syntax is broken",
exception
);
}
)
);
}
}
96 changes: 53 additions & 43 deletions src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,33 +24,40 @@
package org.eolang.lints.misc;

import com.jcabi.xml.XML;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.pipeline.CoreDocument;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.Collection;
import java.util.LinkedList;
import java.util.Locale;
import java.util.Properties;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;
import org.cactoos.io.ResourceOf;
import org.cactoos.list.ListOf;
import org.cactoos.text.TextOf;
import org.eolang.lints.Defect;
import org.eolang.lints.Lint;
import org.eolang.lints.Severity;

/**
* Lint that checks test object name is a verb in singular.
*
* This lint uses <a href="https://stanfordnlp.github.io/CoreNLP/">Stanford CoreNLP model</a>
* with POS tagging capabilities in order to determine the part of speech and
* tense for test object name. Originally, we used <a href="https://opennlp.apache.org/">OpenNLP</a>
* library to do that, but switched to the Stanford CoreNLP, due to merging all
* verb tags into single `VERB` POS tag, that sacrifices important information
* for us about verb tenses, and appeared in OpenNLP 2.4.0+. You can read more
* about the reason of this <a href="https://github.com/objectionary/lints/issues/129">here</a>
* and <a href="https://github.com/objectionary/lints/pull/126#issuecomment-2531121073">here</a>.
* @since 0.0.22
* @todo #72:60min Configure maven to download model file during the build and place into the JAR.
* Currently, we download model file each time when creating the lint, which may
* be slow in the usage of this lint. Instead, let's configure maven to download
* model file during the build, and place into JAR, so lint will be able to locate
* file from resources faster.
* @todo #129:60min Library stanford-corenlp-4.5.7-models.jar takes too much in size.
* Currently, JAR takes ~452mb, which may cause some troubles to the users of
* the lints library. Let's think what we can do about this. We should check is
* it possible to get rid of this dependency and download models from the other
* source.
*/
public final class UnitTestIsNotVerb implements Lint<XML> {

Expand All @@ -60,60 +67,53 @@ public final class UnitTestIsNotVerb implements Lint<XML> {
private static final Pattern KEBAB = Pattern.compile("-");

/**
* The Open NLP tagger.
*/
private final POSTaggerME model;

/**
* Ctor.
* @throws IOException if something went wrong.
* NLP pipeline.
*/
public UnitTestIsNotVerb() throws IOException, URISyntaxException {
this("https://opennlp.sourceforge.net/models-1.5/en-pos-perceptron.bin");
}
private final StanfordCoreNLP pipeline;

/**
* Ctor.
* @param url Model URL
* @throws IOException if something went wrong.
* @param props Pipeline properties
*/
public UnitTestIsNotVerb(final String url) throws IOException, URISyntaxException {
this(new POSModel(new URI(url).toURL()));
public UnitTestIsNotVerb(final Properties props) {
this(new StanfordCoreNLP(props));
}

/**
* Ctor.
* @param pos POS model.
*/
public UnitTestIsNotVerb(final POSModel pos) {
this(new POSTaggerME(pos));
public UnitTestIsNotVerb() {
this(defaults());
}

/**
* Ctor.
*
* @param mdl The Open NLP tagger.
* Primary ctor.
* @param pipe NLP pipeline
*/
public UnitTestIsNotVerb(final POSTaggerME mdl) {
this.model = mdl;
public UnitTestIsNotVerb(final StanfordCoreNLP pipe) {
this.pipeline = pipe;
}

@Override
public Collection<Defect> defects(final XML xmir) throws IOException {
final Collection<Defect> defects = new LinkedList<>();
for (final XML object : xmir.nodes("/program[metas/meta[head='tests']]/objects/o[@name]")) {
final String name = object.xpath("@name").get(0);
final String first = new ListOf<>(
this.model.tag(
Stream
.concat(
Stream.of("It"),
Arrays.stream(UnitTestIsNotVerb.KEBAB.split(name))
).map(s -> s.toLowerCase(Locale.ROOT))
.toArray(String[]::new)
final CoreDocument doc = new CoreDocument(
Stream
.concat(
Stream.of("It"),
Arrays.stream(UnitTestIsNotVerb.KEBAB.split(name))
)
.map(s -> s.toLowerCase(Locale.ROOT))
.collect(Collectors.joining(" "))
);
this.pipeline.annotate(doc);
if (
!"VBZ".equals(
doc.tokens().get(1).get(CoreAnnotations.PartOfSpeechAnnotation.class)
)
).get(1);
if (!("VB".equals(first) || "VBP".equals(first) || "VBZ".equals(first))) {
) {
defects.add(
new Defect.Default(
"unit-test-is-not-verb",
Expand All @@ -139,4 +139,14 @@ public String motive() throws Exception {
)
).asString();
}

/**
* Prestructor for default properties.
* @return Properties.
*/
private static Properties defaults() {
final Properties props = new Properties();
props.setProperty("annotators", "tokenize,pos");
return props;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ void catchesBadName() throws Exception {
)
).parsed()
),
Matchers.hasSize(38)
Matchers.hasSize(40)
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,11 @@
# Test.
[] > chicken-as-expected
42 > @

# Test
[] > please-reboot
42 > @

# Test.
[] > hope-it-works
42 > @
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,3 @@
# Test.
[] > is-almost-correct
42 > @

# Test
[] > please-reboot
42 > @

# Test.
[] > hope-it-works
42 > @

2 comments on commit 82a29b9

@0pdd
Copy link

@0pdd 0pdd commented on 82a29b9 Dec 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Puzzle 72-f7af213e disappeared from src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java), that's why I closed #125. Please, remember that the puzzle was not necessarily removed in this particular commit. Maybe it happened earlier, but we discovered this fact only now.

@0pdd
Copy link

@0pdd 0pdd commented on 82a29b9 Dec 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Puzzle 129-28f91348 discovered in src/main/java/org/eolang/lints/misc/UnitTestIsNotVerb.java) and submitted as #135. Please, remember that the puzzle was not necessarily added in this particular commit. Maybe it was added earlier, but we discovered it only now.

Please sign in to comment.