Merge branch 'main' into reprovision

joshpalis · Jun 10, 2024 · a3bd6d1 · a3bd6d1
2 parents 773fbb8 + b535d5e
commit a3bd6d1
Show file tree

Hide file tree

Showing 32 changed files with 911 additions and 65 deletions.
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -14,10 +14,9 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      # Spotless requires JDK 17+
       - uses: actions/setup-java@v4
         with:
-          java-version: 17
+          java-version: 21
           distribution: temurin
       - name: Spotless Check
         run: ./gradlew spotlessCheck
@@ -26,6 +25,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
+      - uses: actions/setup-java@v4
+        with:
+          java-version: 21
+          distribution: temurin
       - name: Javadoc CheckStyle
         run: ./gradlew checkstyleMain
       - name: Javadoc Check
@@ -35,11 +38,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, macos-13, windows-latest]
-        java: [11, 21]
-        include:
-          - os: ubuntu-latest
-            java: 17
-            codecov: yes
+        java: [21]
     name: Test JDK${{ matrix.java }}, ${{ matrix.os }}
     runs-on: ${{ matrix.os }}
     steps:
@@ -53,7 +52,7 @@ jobs:
         run: |
           ./gradlew check -x integTest -x yamlRestTest -x spotlessJava
       - name: Upload Coverage Report
-        if: ${{ matrix.codecov }}
+        if: contains(matrix.os, 'ubuntu') && contains(matrix.java, '21')
         uses: codecov/codecov-action@v4
         env:
           CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
@@ -65,10 +64,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, macos-13, windows-latest]
-        java: [11, 21]
-        include:
-          - os: ubuntu-latest
-            java: 17
+        java: [21]
     name: Integ Test JDK${{ matrix.java }}, ${{ matrix.os }}
     runs-on: ${{ matrix.os }}
     steps:

diff --git a/.github/workflows/publish-snapshots.yml b/.github/workflows/publish-snapshots.yml
@@ -23,7 +23,7 @@ jobs:
       - uses: actions/setup-java@v4
         with:
           distribution: temurin # Temurin is a distribution of adoptium
-          java-version: 17
+          java-version: 21
       - uses: actions/checkout@v4
       - uses: aws-actions/[email protected]
         with:

diff --git a/.github/workflows/test_bwc.yml b/.github/workflows/test_bwc.yml
@@ -11,7 +11,7 @@ jobs:
   Build-ff-linux:
     strategy:
       matrix:
-        java: [11,17,21]
+        java: [21]
       fail-fast: false
 
     name: Test Flow Framework BWC

diff --git a/.github/workflows/test_security.yml b/.github/workflows/test_security.yml
@@ -16,7 +16,7 @@ jobs:
   integ-test-with-security-linux:
     strategy:
       matrix:
-        java: [11, 17, 21]
+        java: [21]
 
     name: Run Security Integration Tests on Linux
     runs-on: ubuntu-latest

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,8 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.1.0/)
 ### Enhancements
 ### Bug Fixes
 ### Infrastructure
+- Set Java target compatibility to JDK 21 ([#730](https://github.com/opensearch-project/flow-framework/pull/730))
+
 ### Documentation
 ### Maintenance
 ### Refactoring
@@ -17,6 +19,9 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.1.0/)
 ### Enhancements
 - Add Workflow Step for Reindex from source index to destination ([#718](https://github.com/opensearch-project/flow-framework/pull/718))
 - Add param to delete workflow API to clear status even if resources exist ([#719](https://github.com/opensearch-project/flow-framework/pull/719))
+- Add additional default use cases ([#731](https://github.com/opensearch-project/flow-framework/pull/731))
+- Add conversation search default use case with RAG tool ([#732](https://github.com/opensearch-project/flow-framework/pull/732))
+
 ### Bug Fixes
 - Add user mapping to Workflow State index ([#705](https://github.com/opensearch-project/flow-framework/pull/705))
 

diff --git a/DEVELOPER_GUIDE.md b/DEVELOPER_GUIDE.md
@@ -24,12 +24,12 @@ See [OpenSearch requirements](https://github.com/opensearch-project/OpenSearch/b
 
 #### Java
 
-Flow Framework code currently maintains compatibility with JDK 11. Other plugins may require newer Java versions if used.
+The Flow Framework `main` branch targets JDK 21. To ease backporting to `2.x`, maintain compatibility with JDK 11 unless significant benefits can be gained. Other plugins may require newer Java versions if used.
 
 ### Setup
 
 1. Clone the repository (see [Forking and Cloning](#forking-and-cloning))
-2. Make sure `JAVA_HOME` is pointing to a Java 14 JDK (see [Install Prerequisites](#install-prerequisites))
+2. Make sure `JAVA_HOME` is pointing to a Java 21 or higher JDK (see [Install Prerequisites](#install-prerequisites))
 3. Launch Intellij IDEA, Choose Import Project and select the settings.gradle file in the root of this package.
 
 ### Build

diff --git a/build.gradle b/build.gradle
@@ -149,8 +149,8 @@ allprojects {
 }
 
 java {
-    targetCompatibility = JavaVersion.VERSION_11
-    sourceCompatibility = JavaVersion.VERSION_11
+    targetCompatibility = JavaVersion.VERSION_21
+    sourceCompatibility = JavaVersion.VERSION_21
 }
 
 repositories {
@@ -181,6 +181,8 @@ dependencies {
 
     // ZipArchive dependencies used for integration tests
     zipArchive group: 'org.opensearch.plugin', name:'opensearch-ml-plugin', version: "${opensearch_build}"
+    zipArchive group: 'org.opensearch.plugin', name:'opensearch-knn', version: "${opensearch_build}"
+    zipArchive group: 'org.opensearch.plugin', name:'neural-search', version: "${opensearch_build}"
     secureIntegTestPluginArchive group: 'org.opensearch.plugin', name:'opensearch-security', version: "${opensearch_build}"
 
     configurations.all {
@@ -492,7 +494,29 @@ List<Provider<RegularFile>> plugins = [
                 return new RegularFile() {
                     @Override
                     File getAsFile() {
-                        return configurations.zipArchive.asFileTree.getSingleFile()
+                        return configurations.zipArchive.asFileTree.matching{include "**/opensearch-ml-plugin-${opensearch_build}.zip"}.getSingleFile()
+                    }
+                }
+            }
+        }),
+        provider(new Callable<RegularFile>(){
+            @Override
+            RegularFile call() throws Exception {
+                return new RegularFile() {
+                    @Override
+                    File getAsFile() {
+                        return configurations.zipArchive.asFileTree.matching{include "**/opensearch-knn-${opensearch_build}.zip"}.getSingleFile()
+                    }
+                }
+            }
+        }),
+        provider(new Callable<RegularFile>(){
+            @Override
+            RegularFile call() throws Exception {
+                return new RegularFile() {
+                    @Override
+                    File getAsFile() {
+                        return configurations.zipArchive.asFileTree.matching{include "**/neural-search-${opensearch_build}.zip"}.getSingleFile()
                     }
                 }
             }

diff --git a/src/main/java/org/opensearch/flowframework/common/CommonValue.java b/src/main/java/org/opensearch/flowframework/common/CommonValue.java
@@ -227,4 +227,6 @@ private CommonValue() {}
     public static final String CREATE_CONNECTOR_CREDENTIAL_SESSION_TOKEN = "create_connector.credential.session_token";
     /** The field name for ingest pipeline model ID substitution */
     public static final String CREATE_INGEST_PIPELINE_MODEL_ID = "create_ingest_pipeline.model_id";
+    /** The field name for reindex source index substitution */
+    public static final String REINDEX_SOURCE_INDEX = "reindex.source_index";
 }
diff --git a/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java b/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java
@@ -22,6 +22,7 @@
 import static org.opensearch.flowframework.common.CommonValue.CREATE_CONNECTOR_CREDENTIAL_SECRET_KEY;
 import static org.opensearch.flowframework.common.CommonValue.CREATE_CONNECTOR_CREDENTIAL_SESSION_TOKEN;
 import static org.opensearch.flowframework.common.CommonValue.CREATE_INGEST_PIPELINE_MODEL_ID;
+import static org.opensearch.flowframework.common.CommonValue.REINDEX_SOURCE_INDEX;
 
 /**
  * Enum encapsulating the different default use cases and templates we have stored
@@ -132,6 +133,36 @@ public enum DefaultUseCases {
         "defaults/conversational-search-defaults.json",
         "substitutionTemplates/conversational-search-with-cohere-model-template.json",
         List.of(CREATE_CONNECTOR_CREDENTIAL_KEY)
+    ),
+    /** defaults file and substitution ready template for semantic search with a local pretrained model*/
+    SEMANTIC_SEARCH_WITH_LOCAL_MODEL(
+        "semantic_search_with_local_model",
+        "defaults/semantic-search-with-local-model-defaults.json",
+        "substitutionTemplates/semantic-search-with-local-model-template.json",
+        Collections.emptyList()
+
+    ),
+    /** defaults file and substitution ready template for hybrid search with a local pretrained model*/
+    HYBRID_SEARCH_WITH_LOCAL_MODEL(
+        "hybrid_search_with_local_model",
+        "defaults/hybrid-search-with-local-model-defaults.json",
+        "substitutionTemplates/hybrid-search-with-local-model-template.json",
+        Collections.emptyList()
+    ),
+    /** defaults file and substitution ready template for semantic search with reindex command*/
+    SEMANTIC_SEARCH_WITH_REINDEX(
+        "semantic_search_with_reindex",
+        "defaults/semantic-search-with-reindex-defaults.json",
+        "substitutionTemplates/semantic-search-with-reindex-template.json",
+        List.of(CREATE_CONNECTOR_CREDENTIAL_KEY, REINDEX_SOURCE_INDEX)
+    ),
+
+    /** defaults file and substitution ready template for conversational search with bedrock chat model*/
+    CONVERSATIONAL_SEARCH_WITH_BEDROCK_DEPLOY(
+        "conversational-search-with-bedrock-rag-tool",
+        "defaults/conversational-search-rag-tool-defaults.json",
+        "substitutionTemplates/conversational-search-with-bedrock-rag-tool-template.json",
+        List.of(CREATE_CONNECTOR_CREDENTIAL_ACCESS_KEY, CREATE_CONNECTOR_CREDENTIAL_SECRET_KEY, CREATE_CONNECTOR_CREDENTIAL_SESSION_TOKEN)
     );
 
     private final String useCaseName;

diff --git a/src/main/java/org/opensearch/flowframework/workflow/ReindexStep.java b/src/main/java/org/opensearch/flowframework/workflow/ReindexStep.java
@@ -95,10 +95,20 @@ public PlainActionFuture<WorkflowData> execute(
             Float requestsPerSecond = inputs.containsKey(REQUESTS_PER_SECOND)
                 ? Float.parseFloat(inputs.get(REQUESTS_PER_SECOND).toString())
                 : null;
+            requestsPerSecond = requestsPerSecond < 0 ? Float.POSITIVE_INFINITY : requestsPerSecond;
             Boolean requireAlias = inputs.containsKey(REQUIRE_ALIAS) ? Booleans.parseBoolean(inputs.get(REQUIRE_ALIAS).toString()) : null;
-            Integer slices = (Integer) inputs.get(SLICES);
-            Integer maxDocs = (Integer) inputs.get(MAX_DOCS);
-
+            Integer slices;
+            Integer maxDocs;
+            if (inputs.get(SLICES) != null) {
+                slices = Integer.parseInt(String.valueOf(inputs.get(SLICES)));
+            } else {
+                slices = (Integer) inputs.get(SLICES);
+            }
+            if (inputs.get(MAX_DOCS) != null) {
+                maxDocs = Integer.parseInt(String.valueOf(inputs.get(MAX_DOCS)));
+            } else {
+                maxDocs = (Integer) inputs.get(MAX_DOCS);
+            }
             ReindexRequest reindexRequest = new ReindexRequest().setSourceIndices(Strings.splitStringByCommaToArray(sourceIndices))
                 .setDestIndex(destinationIndex);
 

diff --git a/src/main/resources/defaults/conversational-search-rag-tool-defaults.json b/src/main/resources/defaults/conversational-search-rag-tool-defaults.json
@@ -0,0 +1,35 @@
+{
+    "template.name": "deploy-bedrock-chat-model",
+    "template.description": "A template to deploy a Bedrock chat model",
+    "create_bedrock_connector.name": "Amazon Bedrock Connector: Claude Instant V1",
+    "create_bedrock_connector.description": "The connector to bedrock Claude model",
+    "create_bedrock_connector.protocol": "aws_sigv4",
+    "create_bedrock_connector.actions.url": "https://bedrock-runtime.us-west-2.amazonaws.com/model/anthropic.claude-instant-v1/invoke",
+    "create_bedrock_connector.actions.request_body": "{\"prompt\":\"${parameters.prompt}\", \"max_tokens_to_sample\":${parameters.max_tokens_to_sample}, \"temperature\":${parameters.temperature},  \"anthropic_version\":\"${parameters.anthropic_version}\" }",
+    "create_bedrock_connector.credential.access_key": "",
+    "create_bedrock_connector.credential.secret_key": "",
+    "create_bedrock_connector.credential.session_token": "",
+    "create_bedrock_connector.region": "us-west-2",
+    "create_embedding_connector.name": "Amazon Bedrock Connector: embedding",
+    "create_embedding_connector.description": "The connector to bedrock Titan embedding model",
+    "create_embedding_connector.protocol": "aws_sigv4",
+    "create_embedding_connector.actions.url": "https://bedrock-runtime.us-west-2.amazonaws.com/model/amazon.titan-embed-text-v2:0/invoke",
+    "create_embedding_connector.actions.request_body": "{ \"inputText\": \"${parameters.inputText}\" }",
+    "register_bedrock_model.name": "anthropic.claude-v2",
+    "register_bedrock_model.description": "bedrock-chat-model",
+    "register_bedrock_embedding_model.name": "Bedrock embedding model v2",
+    "register_bedrock_embedding_model.description": "Bedrock embedding model v2",
+    "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
+    "create_ingest_pipeline.description": "A neural ingest pipeline",
+    "text_embedding.field_map.input": "passage_text",
+    "text_embedding.field_map.output": "passage_embedding",
+    "create_index.name": "my-nlp-index",
+    "create_index.mappings.method.engine": "lucene",
+    "create_index.mappings.method.space_type": "l2",
+    "create_index.mappings.method.name": "hnsw",
+    "text_embedding.field_map.output.dimension": "1024",
+    "rag_tool.parameters.prompt": "\n\nHuman:You are a professional data analysist. You will always answer question based on the given context first. If the answer is not directly shown in the context, you will analyze the data and find the answer. If you don't know the answer, just say don't know. \n\n Context:\n${parameters.output_field:-}\n\n\nHuman:${parameters.question}\n\nAssistant:",
+    "root_agent.parameters.parameters": "Answer the question as best you can.",
+    "root_agent.name": "Root agent",
+    "root_agent.description": "this is the root agent"
+}
diff --git a/src/main/resources/defaults/hybrid-search-defaults.json b/src/main/resources/defaults/hybrid-search-defaults.json
@@ -14,6 +14,5 @@
     "text_embedding.field_map.output.dimension": "1024",
     "create_search_pipeline.pipeline_id": "nlp-search-pipeline",
     "normalization-processor.normalization.technique": "min_max",
-    "normalization-processor.combination.technique": "arithmetic_mean",
-    "normalization-processor.combination.parameters.weights": "[0.3, 0.7]"
+    "normalization-processor.combination.technique": "arithmetic_mean"
 }
diff --git a/src/main/resources/defaults/hybrid-search-with-local-model-defaults.json b/src/main/resources/defaults/hybrid-search-with-local-model-defaults.json
@@ -0,0 +1,23 @@
+{
+  "template.name": "hybrid-search",
+  "template.description": "Setting up hybrid search, ingest pipeline and index",
+  "register_local_pretrained_model.name": "huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2",
+  "register_local_pretrained_model.description": "This is a sentence transformer model",
+  "register_local_pretrained_model.model_format": "TORCH_SCRIPT",
+  "register_local_pretrained_model.deploy": "true",
+  "register_local_pretrained_model.version": "1.0.1",
+  "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
+  "create_ingest_pipeline.description": "A text embedding pipeline",
+  "create_ingest_pipeline.model_id": "123",
+  "text_embedding.field_map.input": "passage_text",
+  "text_embedding.field_map.output": "passage_embedding",
+  "create_index.name": "my-nlp-index",
+  "create_index.settings.number_of_shards": "2",
+  "create_index.mappings.method.engine": "lucene",
+  "create_index.mappings.method.space_type": "l2",
+  "create_index.mappings.method.name": "hnsw",
+  "text_embedding.field_map.output.dimension": "768",
+  "create_search_pipeline.pipeline_id": "nlp-search-pipeline",
+  "normalization-processor.normalization.technique": "min_max",
+  "normalization-processor.combination.technique": "arithmetic_mean"
+}
diff --git a/src/main/resources/defaults/multi-modal-search-defaults.json b/src/main/resources/defaults/multi-modal-search-defaults.json
@@ -11,5 +11,7 @@
     "create_index.settings.number_of_shards": "2",
     "text_image_embedding.field_map.output.dimension": "1024",
     "create_index.mappings.method.engine": "lucene",
-    "create_index.mappings.method.name": "hnsw"
+    "create_index.mappings.method.name": "hnsw",
+    "text_image_embedding.field_map.image.type": "text",
+    "text_image_embedding.field_map.text.type": "text"
 }
diff --git a/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json b/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json
@@ -24,5 +24,7 @@
     "create_index.settings.number_of_shards": "2",
     "text_image_embedding.field_map.output.dimension": "1024",
     "create_index.mappings.method.engine": "lucene",
-    "create_index.mappings.method.name": "hnsw"
+    "create_index.mappings.method.name": "hnsw",
+    "text_image_embedding.field_map.image.type": "text",
+    "text_image_embedding.field_map.text.type": "text"
 }
diff --git a/src/main/resources/defaults/semantic-search-with-local-model-defaults.json b/src/main/resources/defaults/semantic-search-with-local-model-defaults.json
@@ -0,0 +1,20 @@
+{
+  "template.name": "semantic search with local pretrained model",
+  "template.description": "Setting up semantic search, with a local pretrained embedding model",
+  "register_local_pretrained_model.name": "huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2",
+  "register_local_pretrained_model.description": "This is a sentence transformer model",
+  "register_local_pretrained_model.model_format": "TORCH_SCRIPT",
+  "register_local_pretrained_model.deploy": "true",
+  "register_local_pretrained_model.version": "1.0.1",
+  "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
+  "create_ingest_pipeline.description": "A text embedding pipeline",
+  "text_embedding.field_map.input": "passage_text",
+  "text_embedding.field_map.output": "passage_embedding",
+  "create_index.name": "my-nlp-index",
+  "create_index.settings.number_of_shards": "2",
+  "create_index.mappings.method.engine": "lucene",
+  "create_index.mappings.method.space_type": "l2",
+  "create_index.mappings.method.name": "hnsw",
+  "text_embedding.field_map.output.dimension": "768",
+  "create_search_pipeline.pipeline_id": "default_model_pipeline"
+}
diff --git a/src/main/resources/defaults/semantic-search-with-reindex-defaults.json b/src/main/resources/defaults/semantic-search-with-reindex-defaults.json
@@ -0,0 +1,31 @@
+{
+    "template.name": "semantic search with cohere embedding",
+    "template.description": "Setting up semantic search, with a Cohere embedding model",
+    "create_connector.name": "cohere-embedding-connector",
+    "create_connector.description": "The connector to Cohere's public embed API",
+    "create_connector.protocol": "http",
+    "create_connector.model": "embed-english-v3.0",
+    "create_connector.input_type": "search_document",
+    "create_connector.truncate": "end",
+    "create_connector.credential.key": "123",
+    "create_connector.actions.url": "https://api.cohere.ai/v1/embed",
+    "create_connector.actions.request_body": "{ \"texts\": ${parameters.texts}, \"truncate\": \"${parameters.truncate}\", \"model\": \"${parameters.model}\", \"input_type\": \"${parameters.input_type}\" }",
+    "create_connector.actions.pre_process_function": "connector.pre_process.cohere.embedding",
+    "create_connector.actions.post_process_function": "connector.post_process.cohere.embedding",
+    "register_remote_model.name": "Cohere english embed model",
+    "register_remote_model.description": "cohere-embedding-model",
+    "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
+    "create_ingest_pipeline.description": "A text embedding pipeline",
+    "text_embedding.field_map.input": "passage_text",
+    "text_embedding.field_map.output": "passage_embedding",
+    "create_index.name": "my-nlp-index",
+    "create_index.settings.number_of_shards": "2",
+    "create_index.mappings.method.engine": "lucene",
+    "create_index.mappings.method.space_type": "l2",
+    "create_index.mappings.method.name": "hnsw",
+    "text_embedding.field_map.output.dimension": "1024",
+    "create_search_pipeline.pipeline_id": "default_model_pipeline",
+    "reindex.source_index": "",
+    "reindex.requests_per_second": "-1",
+    "reindex.slices": "1"
+}