Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Get rid of inefficient Stream.count() #12975

Merged
merged 1 commit into from
Dec 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
* Title, Date, Dateline, Body
*/
public class ExtractReuters {
private Path reutersDir;
private Path outputDir;
private final Path reutersDir;
private final Path outputDir;

public ExtractReuters(Path reutersDir, Path outputDir) throws IOException {
this.reutersDir = reutersDir;
Expand All @@ -45,8 +45,8 @@ public ExtractReuters(Path reutersDir, Path outputDir) throws IOException {
public void extract() throws IOException {
long count = 0;
Files.createDirectories(outputDir);
try(Stream<Path> files = Files.list(outputDir)) {
if (files.count() > 0) {
try (Stream<Path> files = Files.list(outputDir)) {
if (files.findAny().isPresent()) {
throw new IOException("The output directory must be empty: " + outputDir);
}
}
Expand All @@ -65,9 +65,9 @@ public void extract() throws IOException {
Pattern EXTRACTION_PATTERN =
Pattern.compile("<TITLE>(.*?)</TITLE>|<DATE>(.*?)</DATE>|<BODY>(.*?)</BODY>");

private static String[] META_CHARS = {"&", "<", ">", "\"", "'"};
private static final String[] META_CHARS = {"&", "<", ">", "\"", "'"};

private static String[] META_CHARS_SERIALIZATIONS = {"&amp;", "&lt;", "&gt;", "&quot;", "&apos;"};
private static final String[] META_CHARS_SERIALIZATIONS = {"&amp;", "&lt;", "&gt;", "&quot;", "&apos;"};

/** Override if you wish to change what is extracted */
protected void extractFile(Path sgmFile) throws IOException {
Expand All @@ -80,7 +80,7 @@ protected void extractFile(Path sgmFile) throws IOException {
while ((line = reader.readLine()) != null) {
// when we see a closing reuters tag, flush the file

if (line.indexOf("</REUTERS") == -1) {
if (line.contains("</REUTERS") == false) {
// Replace the SGM escape sequences

buffer.append(line).append(' '); // accumulate the strings for now,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ public void reset() {
}

/**
* Gathers up merged input positions into a single output position, only for the current
* "frontier" of nodes we've seen but can't yet output because they are not frozen.
* Gathers merged input positions into a single output position, only for the current "frontier"
* of nodes we've seen but can't yet output because they are not frozen.
*/
private static final class OutputNode implements RollingBuffer.Resettable {
private final List<Integer> inputNodes = new ArrayList<>();
Expand Down Expand Up @@ -115,15 +115,15 @@ public void reset() {
}

private final RollingBuffer<InputNode> inputNodes =
new RollingBuffer<InputNode>() {
new RollingBuffer<>() {
@Override
protected InputNode newInstance() {
return new InputNode();
}
};

private final RollingBuffer<OutputNode> outputNodes =
new RollingBuffer<OutputNode>() {
new RollingBuffer<>() {
@Override
protected OutputNode newInstance() {
return new OutputNode();
Expand Down Expand Up @@ -193,10 +193,10 @@ private boolean releaseBufferedToken() {
+ " vs output.inputNodes.size()="
+ output.inputNodes.size();
InputNode inputNode = inputNodes.get(output.inputNodes.get(output.nextOut));
if (done && inputNode.tokens.size() == 0 && outputFrom >= outputNodes.getMaxPos()) {
if (done && inputNode.tokens.isEmpty() && outputFrom >= outputNodes.getMaxPos()) {
return false;
}
if (inputNode.tokens.size() == 0) {
if (inputNode.tokens.isEmpty()) {
assert inputNode.nextOut == 0;
// Hole dest nodes should never be merged since 1) we always
// assign them to a new output position, and 2) since they never
Expand All @@ -210,7 +210,7 @@ private boolean releaseBufferedToken() {
continue;
}
}
// Don't free from a hole src. Since no edge leaves here book keeping may be incorrect.
// Don't free from a hole src. Since no edge leaves here bookkeeping may be incorrect.
// Later output nodes may point to earlier input nodes. So we don't want to free them yet.
freeBefore(output);
continue;
Expand Down Expand Up @@ -271,7 +271,7 @@ private boolean releaseBufferedToken() {
* @param output target output node
*/
private void freeBefore(OutputNode output) {
/* We've released all of the tokens that end at the current output, so free all output nodes before this.
/* We've released all the tokens that end at the current output, so free all output nodes before this.
Input nodes are more complex. The second shingled tokens with alternate paths can appear later in the output graph
than some of their alternate path tokens. Because of this case we can only free from the minimum because
the minimum node will have come from before the second shingled token.
Expand All @@ -283,7 +283,7 @@ private void freeBefore(OutputNode output) {
int freeBefore = Collections.min(output.inputNodes);
// This will catch a node being freed early if it is input to the next output.
// Could a freed early node be input to a later output?
assert outputNodes.get(outputFrom).inputNodes.stream().filter(n -> freeBefore > n).count() == 0
assert outputNodes.get(outputFrom).inputNodes.stream().noneMatch(n -> freeBefore > n)
: "FreeBefore " + freeBefore + " will free in use nodes";
inputNodes.freeBefore(freeBefore);
outputNodes.freeBefore(outputFrom);
Expand Down
Loading