-
Notifications
You must be signed in to change notification settings - Fork 72
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[FEATURE] Support batch ingestion in TextEmbeddingProcessor & SparseE…
…ncodingProcessor (#744) (#762) * Support batch ingestion in TextEmbeddingProcess & SparseEncodingProcessor Signed-off-by: Liyun Xiu <[email protected]> * Update Changelog Signed-off-by: Liyun Xiu <[email protected]> * Add UT and IT Signed-off-by: Liyun Xiu <[email protected]> * Add comments Signed-off-by: Liyun Xiu <[email protected]> * Sort texts by length before sending for inference Signed-off-by: Liyun Xiu <[email protected]> * Make consistent check for inferenceList Signed-off-by: Liyun Xiu <[email protected]> --------- Signed-off-by: Liyun Xiu <[email protected]> (cherry picked from commit afd1215)
- Loading branch information
Showing
13 changed files
with
642 additions
and
72 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
59 changes: 59 additions & 0 deletions
59
src/test/java/org/opensearch/neuralsearch/processor/InferenceProcessorTestCase.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
package org.opensearch.neuralsearch.processor; | ||
|
||
import com.google.common.collect.ImmutableList; | ||
import org.opensearch.ingest.IngestDocument; | ||
import org.opensearch.ingest.IngestDocumentWrapper; | ||
import org.opensearch.test.OpenSearchTestCase; | ||
|
||
import java.util.ArrayList; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
public class InferenceProcessorTestCase extends OpenSearchTestCase { | ||
|
||
protected List<IngestDocumentWrapper> createIngestDocumentWrappers(int count) { | ||
List<IngestDocumentWrapper> wrapperList = new ArrayList<>(); | ||
for (int i = 0; i < count; ++i) { | ||
Map<String, Object> sourceAndMetadata = new HashMap<>(); | ||
sourceAndMetadata.put("key1", "value1"); | ||
wrapperList.add(new IngestDocumentWrapper(i, new IngestDocument(sourceAndMetadata, new HashMap<>()), null)); | ||
} | ||
return wrapperList; | ||
} | ||
|
||
protected List<List<Float>> createMockVectorWithLength(int size) { | ||
float suffix = .234f; | ||
List<List<Float>> result = new ArrayList<>(); | ||
for (int i = 0; i < size * 2;) { | ||
List<Float> number = new ArrayList<>(); | ||
number.add(i++ + suffix); | ||
number.add(i++ + suffix); | ||
result.add(number); | ||
} | ||
return result; | ||
} | ||
|
||
protected List<List<Float>> createMockVectorResult() { | ||
List<List<Float>> modelTensorList = new ArrayList<>(); | ||
List<Float> number1 = ImmutableList.of(1.234f, 2.354f); | ||
List<Float> number2 = ImmutableList.of(3.234f, 4.354f); | ||
List<Float> number3 = ImmutableList.of(5.234f, 6.354f); | ||
List<Float> number4 = ImmutableList.of(7.234f, 8.354f); | ||
List<Float> number5 = ImmutableList.of(9.234f, 10.354f); | ||
List<Float> number6 = ImmutableList.of(11.234f, 12.354f); | ||
List<Float> number7 = ImmutableList.of(13.234f, 14.354f); | ||
modelTensorList.add(number1); | ||
modelTensorList.add(number2); | ||
modelTensorList.add(number3); | ||
modelTensorList.add(number4); | ||
modelTensorList.add(number5); | ||
modelTensorList.add(number6); | ||
modelTensorList.add(number7); | ||
return modelTensorList; | ||
} | ||
} |
Oops, something went wrong.