Skip to content

Commit

Permalink
Refactoring BaseSparseEncodingIT and enabling BWC tests
Browse files Browse the repository at this point in the history
Signed-off-by: Varun Jain <[email protected]>
  • Loading branch information
vibrantvarun committed Jan 24, 2024
1 parent bd8a3d2 commit 27a20b2
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 151 deletions.
6 changes: 2 additions & 4 deletions .github/workflows/backwards_compatibility_tests_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ jobs:
- name: Run NeuralSearch Restart-Upgrade BWC Tests from BWCVersion-${{ matrix.bwc_version }} to OpenSearch Version-${{ matrix.opensearch_version }} on ${{matrix.os}}
run: |
echo "Running restart-upgrade backwards compatibility tests ..."
# Disabling BWC tests due to ongoing build failure. https://github.com/opensearch-project/neural-search/issues/536
# ./gradlew :qa:restart-upgrade:testAgainstNewCluster -D'tests.bwc.version=${{ matrix.bwc_version }}'
./gradlew :qa:restart-upgrade:testAgainstNewCluster -D'tests.bwc.version=${{ matrix.bwc_version }}'
Rolling-Upgrade-BWCTests-NeuralSearch:
strategy:
Expand All @@ -64,5 +63,4 @@ jobs:
- name: Run NeuralSearch Rolling-Upgrade BWC Tests from BWCVersion-${{ matrix.bwc_version }} to OpenSearch Version-${{ matrix.opensearch_version }} on ${{matrix.os}}
run: |
echo "Running rolling-upgrade backwards compatibility tests ..."
# Disabling BWC tests due to ongoing build failure. https://github.com/opensearch-project/neural-search/issues/536
# ./gradlew :qa:rolling-upgrade:testRollingUpgrade -D'tests.bwc.version=${{ matrix.bwc_version }}'
./gradlew :qa:rolling-upgrade:testRollingUpgrade -D'tests.bwc.version=${{ matrix.bwc_version }}'
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
import java.util.Optional;
import org.junit.Before;
import org.opensearch.common.settings.Settings;
import org.opensearch.neuralsearch.BaseSparseEncodingIT;
import org.opensearch.neuralsearch.BaseNeuralSearchIT;
import static org.opensearch.neuralsearch.TestUtils.CLIENT_TIMEOUT_VALUE;
import static org.opensearch.neuralsearch.TestUtils.RESTART_UPGRADE_OLD_CLUSTER;
import static org.opensearch.neuralsearch.TestUtils.BWC_VERSION;
import static org.opensearch.neuralsearch.TestUtils.NEURAL_SEARCH_BWC_PREFIX;
import org.opensearch.test.rest.OpenSearchRestTestCase;

public abstract class AbstractRestartUpgradeRestTestCase extends BaseSparseEncodingIT {
public abstract class AbstractRestartUpgradeRestTestCase extends BaseNeuralSearchIT {

@Before
protected String getIndexNameForTest() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import java.util.Optional;
import org.junit.Before;
import org.opensearch.common.settings.Settings;
import org.opensearch.neuralsearch.BaseSparseEncodingIT;
import org.opensearch.neuralsearch.BaseNeuralSearchIT;
import org.opensearch.test.rest.OpenSearchRestTestCase;
import static org.opensearch.neuralsearch.TestUtils.OLD_CLUSTER;
import static org.opensearch.neuralsearch.TestUtils.MIXED_CLUSTER;
Expand All @@ -18,7 +18,7 @@
import static org.opensearch.neuralsearch.TestUtils.BWCSUITE_CLUSTER;
import static org.opensearch.neuralsearch.TestUtils.NEURAL_SEARCH_BWC_PREFIX;

public abstract class AbstractRollingUpgradeTestCase extends BaseSparseEncodingIT {
public abstract class AbstractRollingUpgradeTestCase extends BaseNeuralSearchIT {

@Before
protected String getIndexNameForTest() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
import org.opensearch.client.Response;
import org.opensearch.common.xcontent.XContentHelper;
import org.opensearch.common.xcontent.XContentType;
import org.opensearch.neuralsearch.BaseSparseEncodingIT;
import org.opensearch.neuralsearch.BaseNeuralSearchIT;

import com.google.common.collect.ImmutableList;

import lombok.SneakyThrows;

public class SparseEncodingProcessIT extends BaseSparseEncodingIT {
public class SparseEncodingProcessIT extends BaseNeuralSearchIT {

private static final String INDEX_NAME = "sparse_encoding_index";

Expand All @@ -39,7 +39,7 @@ public void tearDown() {
}

public void testSparseEncodingProcessor() throws Exception {
String modelId = prepareModel();
String modelId = prepareSparseEncodingModel();
createPipelineProcessor(modelId, PIPELINE_NAME, ProcessorType.SPARSE_ENCODING);
createSparseEncodingIndex();
ingestDocument();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
*/
package org.opensearch.neuralsearch.query;

import org.opensearch.neuralsearch.BaseNeuralSearchIT;
import static org.opensearch.neuralsearch.TestUtils.objectToFloat;

import java.util.List;
Expand All @@ -15,12 +16,11 @@
import org.opensearch.index.query.BoolQueryBuilder;
import org.opensearch.index.query.MatchAllQueryBuilder;
import org.opensearch.index.query.MatchQueryBuilder;
import org.opensearch.neuralsearch.BaseSparseEncodingIT;
import org.opensearch.neuralsearch.TestUtils;

import lombok.SneakyThrows;

public class NeuralSparseQueryIT extends BaseSparseEncodingIT {
public class NeuralSparseQueryIT extends BaseNeuralSearchIT {
private static final String TEST_BASIC_INDEX_NAME = "test-sparse-basic-index";
private static final String TEST_MULTI_NEURAL_SPARSE_FIELD_INDEX_NAME = "test-sparse-multi-field-index";
private static final String TEST_TEXT_AND_NEURAL_SPARSE_FIELD_INDEX_NAME = "test-sparse-text-and-field-index";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import org.opensearch.index.query.QueryBuilder;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.neuralsearch.util.NeuralSearchClusterUtil;
import org.opensearch.neuralsearch.util.TokenWeightUtil;
import org.opensearch.test.ClusterServiceUtils;
import org.opensearch.threadpool.TestThreadPool;
import org.opensearch.threadpool.ThreadPool;
Expand Down Expand Up @@ -211,6 +212,21 @@ protected String prepareModel() {
return modelId;
}

/**
* Upload default model and load into the cluster
*
* @return modelID
*/
@SneakyThrows
protected String prepareSparseEncodingModel() {
String requestBody = Files.readString(
Path.of(classLoader.getResource("processor/UploadSparseEncodingModelRequestBody.json").toURI())
);
String modelId = registerModelGroupAndUploadModel(requestBody);
loadModel(modelId);
return modelId;
}

/**
* Execute model inference on the provided query text
*
Expand Down Expand Up @@ -473,6 +489,36 @@ protected void addKnnDoc(
assertEquals(request.getEndpoint() + ": failed", RestStatus.CREATED, RestStatus.fromCode(response.getStatusLine().getStatusCode()));
}

@SneakyThrows
protected void addSparseEncodingDoc(String index, String docId, List<String> fieldNames, List<Map<String, Float>> docs) {
addSparseEncodingDoc(index, docId, fieldNames, docs, Collections.emptyList(), Collections.emptyList());
}

@SneakyThrows
protected void addSparseEncodingDoc(
String index,
String docId,
List<String> fieldNames,
List<Map<String, Float>> docs,
List<String> textFieldNames,
List<String> texts
) {
Request request = new Request("POST", "/" + index + "/_doc/" + docId + "?refresh=true");
XContentBuilder builder = XContentFactory.jsonBuilder().startObject();
for (int i = 0; i < fieldNames.size(); i++) {
builder.field(fieldNames.get(i), docs.get(i));
}

for (int i = 0; i < textFieldNames.size(); i++) {
builder.field(textFieldNames.get(i), texts.get(i));
}
builder.endObject();

request.setJsonEntity(builder.toString());
Response response = client().performRequest(request);
assertEquals(request.getEndpoint() + ": failed", RestStatus.CREATED, RestStatus.fromCode(response.getStatusLine().getStatusCode()));
}

/**
* Parse the first returned hit from a search response as a map
*
Expand Down Expand Up @@ -534,6 +580,19 @@ protected void prepareKnnIndex(String indexName, List<KNNFieldConfig> knnFieldCo
createIndexWithConfiguration(indexName, buildIndexConfiguration(knnFieldConfigs, numOfShards), "");
}

@SneakyThrows
protected void prepareSparseEncodingIndex(String indexName, List<String> sparseEncodingFieldNames) {
XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("mappings").startObject("properties");

for (String fieldName : sparseEncodingFieldNames) {
xContentBuilder.startObject(fieldName).field("type", "rank_features").endObject();
}

xContentBuilder.endObject().endObject().endObject();
String indexMappings = xContentBuilder.toString();
createIndexWithConfiguration(indexName, indexMappings, "");
}

/**
* Computes the expected distance between an indexVector and query text without using the neural query type.
*
Expand Down Expand Up @@ -943,6 +1002,60 @@ protected Map<String, Object> deletePipeline(String pipelineName) {
return responseMap;
}

protected float computeExpectedScore(String modelId, Map<String, Float> tokenWeightMap, String queryText) {
Map<String, Float> queryTokens = runSparseModelInference(modelId, queryText);
return computeExpectedScore(tokenWeightMap, queryTokens);
}

protected float computeExpectedScore(Map<String, Float> tokenWeightMap, Map<String, Float> queryTokens) {
Float score = 0f;
for (Map.Entry<String, Float> entry : queryTokens.entrySet()) {
if (tokenWeightMap.containsKey(entry.getKey())) {
score += entry.getValue() * getFeatureFieldCompressedNumber(tokenWeightMap.get(entry.getKey()));
}
}
return score;
}

@SneakyThrows
protected Map<String, Float> runSparseModelInference(String modelId, String queryText) {
Response inferenceResponse = makeRequest(
client(),
"POST",
String.format(LOCALE, "/_plugins/_ml/models/%s/_predict", modelId),
null,
toHttpEntity(String.format(LOCALE, "{\"text_docs\": [\"%s\"]}", queryText)),
ImmutableList.of(new BasicHeader(HttpHeaders.USER_AGENT, DEFAULT_USER_AGENT))
);

Map<String, Object> inferenceResJson = XContentHelper.convertToMap(
XContentType.JSON.xContent(),
EntityUtils.toString(inferenceResponse.getEntity()),
false
);

Object inference_results = inferenceResJson.get("inference_results");
assertTrue(inference_results instanceof List);
List<Object> inferenceResultsAsMap = (List<Object>) inference_results;
assertEquals(1, inferenceResultsAsMap.size());
Map<String, Object> result = (Map<String, Object>) inferenceResultsAsMap.get(0);
List<Object> output = (List<Object>) result.get("output");
assertEquals(1, output.size());
Map<String, Object> map = (Map<String, Object>) output.get(0);
assertEquals(1, map.size());
Map<String, Object> dataAsMap = (Map<String, Object>) map.get("dataAsMap");
return TokenWeightUtil.fetchListOfTokenWeightMap(List.of(dataAsMap)).get(0);
}

// rank_features use lucene FeatureField, which will compress the Float number to 16 bit
// this function simulate the encoding and decoding progress in lucene FeatureField
protected Float getFeatureFieldCompressedNumber(Float originNumber) {
int freqBits = Float.floatToIntBits(originNumber);
freqBits = freqBits >> 15;
freqBits = ((int) ((float) freqBits)) << 15;
return Float.intBitsToFloat(freqBits);
}

/**
* Enumeration for types of pipeline processors, used to lookup resources like create
* processor request as those are type specific
Expand Down

This file was deleted.

0 comments on commit 27a20b2

Please sign in to comment.