From 71c10128baae9a0f2ba5dc9208b9614da3722d17 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Wed, 11 Dec 2024 21:09:01 +0000 Subject: [PATCH] fix the tests --- .../chunking/WordBoundaryChunker.java | 4 ---- .../chunking/SentenceBoundaryChunkerTests.java | 16 ++++++++-------- .../chunking/WordBoundaryChunkerTests.java | 18 +++++++----------- 3 files changed, 15 insertions(+), 23 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunker.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunker.java index b15e2134f4cf7..1ce90a9e416e5 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunker.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunker.java @@ -96,10 +96,6 @@ List chunkPositions(String input, int chunkSize, int overlap) { throw new IllegalArgumentException("Invalid chunking parameters, overlap [" + overlap + "] must be >= 0"); } - if (input.isEmpty()) { - return List.of(); - } - var chunkPositions = new ArrayList(); // This position in the chunk is where the next overlapping chunk will start diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/chunking/SentenceBoundaryChunkerTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/chunking/SentenceBoundaryChunkerTests.java index 357f5ddfb1501..f81894ccd4bbb 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/chunking/SentenceBoundaryChunkerTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/chunking/SentenceBoundaryChunkerTests.java @@ -44,37 +44,37 @@ private List textChunks( } public void testEmptyString() { - var chunks = new SentenceBoundaryChunker().chunk("", 100, randomBoolean()); + var chunks = textChunks(new SentenceBoundaryChunker(), "", 100, randomBoolean()); assertThat(chunks, hasSize(1)); assertThat(chunks.get(0), Matchers.is("")); } public void testBlankString() { - var chunks = new SentenceBoundaryChunker().chunk(" ", 100, randomBoolean()); + var chunks = textChunks(new SentenceBoundaryChunker(), " ", 100, randomBoolean()); assertThat(chunks, hasSize(1)); assertThat(chunks.get(0), Matchers.is(" ")); } public void testSingleChar() { - var chunks = new SentenceBoundaryChunker().chunk(" b", 100, randomBoolean()); + var chunks = textChunks(new SentenceBoundaryChunker(), " b", 100, randomBoolean()); assertThat(chunks, Matchers.contains(" b")); - chunks = new SentenceBoundaryChunker().chunk("b", 100, randomBoolean()); + chunks = textChunks(new SentenceBoundaryChunker(), "b", 100, randomBoolean()); assertThat(chunks, Matchers.contains("b")); - chunks = new SentenceBoundaryChunker().chunk(". ", 100, randomBoolean()); + chunks = textChunks(new SentenceBoundaryChunker(), ". ", 100, randomBoolean()); assertThat(chunks, Matchers.contains(". ")); - chunks = new SentenceBoundaryChunker().chunk(" , ", 100, randomBoolean()); + chunks = textChunks(new SentenceBoundaryChunker(), " , ", 100, randomBoolean()); assertThat(chunks, Matchers.contains(" , ")); - chunks = new SentenceBoundaryChunker().chunk(" ,", 100, randomBoolean()); + chunks = textChunks(new SentenceBoundaryChunker(), " ,", 100, randomBoolean()); assertThat(chunks, Matchers.contains(" ,")); } public void testSingleCharRepeated() { var input = "a".repeat(32_000); - var chunks = new SentenceBoundaryChunker().chunk(input, 100, randomBoolean()); + var chunks = textChunks(new SentenceBoundaryChunker(), input, 100, randomBoolean()); assertThat(chunks, Matchers.contains(input)); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunkerTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunkerTests.java index 6f8c0404fd9a9..b4fa5c9122258 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunkerTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunkerTests.java @@ -72,10 +72,6 @@ public class WordBoundaryChunkerTests extends ESTestCase { * Use the chunk functions that return offsets where possible */ List textChunks(WordBoundaryChunker chunker, String input, int chunkSize, int overlap) { - if (input.isEmpty()) { - return List.of(""); - } - var chunkPositions = chunker.chunk(input, chunkSize, overlap); return chunkPositions.stream().map(p -> input.substring(p.start(), p.end())).collect(Collectors.toList()); } @@ -242,31 +238,31 @@ public void testWhitespace() { } public void testBlankString() { - var chunks = new WordBoundaryChunker().chunk(" ", 100, 10); + var chunks = textChunks(new WordBoundaryChunker(), " ", 100, 10); assertThat(chunks, hasSize(1)); assertThat(chunks.get(0), Matchers.is(" ")); } public void testSingleChar() { - var chunks = new WordBoundaryChunker().chunk(" b", 100, 10); + var chunks = textChunks(new WordBoundaryChunker(), " b", 100, 10); assertThat(chunks, Matchers.contains(" b")); - chunks = new WordBoundaryChunker().chunk("b", 100, 10); + chunks = textChunks(new WordBoundaryChunker(), "b", 100, 10); assertThat(chunks, Matchers.contains("b")); - chunks = new WordBoundaryChunker().chunk(". ", 100, 10); + chunks = textChunks(new WordBoundaryChunker(), ". ", 100, 10); assertThat(chunks, Matchers.contains(". ")); - chunks = new WordBoundaryChunker().chunk(" , ", 100, 10); + chunks = textChunks(new WordBoundaryChunker(), " , ", 100, 10); assertThat(chunks, Matchers.contains(" , ")); - chunks = new WordBoundaryChunker().chunk(" ,", 100, 10); + chunks = textChunks(new WordBoundaryChunker(), " ,", 100, 10); assertThat(chunks, Matchers.contains(" ,")); } public void testSingleCharRepeated() { var input = "a".repeat(32_000); - var chunks = new WordBoundaryChunker().chunk(input, 100, 10); + var chunks = textChunks(new WordBoundaryChunker(), input, 100, 10); assertThat(chunks, Matchers.contains(input)); }