From a941b689cae10bb9136df61ccd41e60ebc8e9db4 Mon Sep 17 00:00:00 2001 From: Daniel Widdis Date: Thu, 6 Jun 2024 17:18:55 -0700 Subject: [PATCH 1/4] Set Java target compatibility to JDK 21 (#730) Signed-off-by: Daniel Widdis --- .github/workflows/CI.yml | 20 ++++++++------------ .github/workflows/test_bwc.yml | 2 +- .github/workflows/test_security.yml | 2 +- CHANGELOG.md | 3 +++ DEVELOPER_GUIDE.md | 4 ++-- build.gradle | 4 ++-- 6 files changed, 17 insertions(+), 18 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 968da85e3..1b920f3ce 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -14,10 +14,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - # Spotless requires JDK 17+ - uses: actions/setup-java@v4 with: - java-version: 17 + java-version: 21 distribution: temurin - name: Spotless Check run: ./gradlew spotlessCheck @@ -26,6 +25,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-java@v4 + with: + java-version: 21 + distribution: temurin - name: Javadoc CheckStyle run: ./gradlew checkstyleMain - name: Javadoc Check @@ -35,11 +38,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-13, windows-latest] - java: [11, 21] - include: - - os: ubuntu-latest - java: 17 - codecov: yes + java: [21] name: Test JDK${{ matrix.java }}, ${{ matrix.os }} runs-on: ${{ matrix.os }} steps: @@ -53,7 +52,7 @@ jobs: run: | ./gradlew check -x integTest -x yamlRestTest -x spotlessJava - name: Upload Coverage Report - if: ${{ matrix.codecov }} + if: contains(matrix.os, 'ubuntu') && contains(matrix.java, '21') uses: codecov/codecov-action@v4 env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} @@ -65,10 +64,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-13, windows-latest] - java: [11, 21] - include: - - os: ubuntu-latest - java: 17 + java: [21] name: Integ Test JDK${{ matrix.java }}, ${{ matrix.os }} runs-on: ${{ matrix.os }} steps: diff --git a/.github/workflows/test_bwc.yml b/.github/workflows/test_bwc.yml index b5f2e2d16..bdb57733f 100644 --- a/.github/workflows/test_bwc.yml +++ b/.github/workflows/test_bwc.yml @@ -11,7 +11,7 @@ jobs: Build-ff-linux: strategy: matrix: - java: [11,17,21] + java: [21] fail-fast: false name: Test Flow Framework BWC diff --git a/.github/workflows/test_security.yml b/.github/workflows/test_security.yml index fafcec0fa..a1c77f39e 100644 --- a/.github/workflows/test_security.yml +++ b/.github/workflows/test_security.yml @@ -16,7 +16,7 @@ jobs: integ-test-with-security-linux: strategy: matrix: - java: [11, 17, 21] + java: [21] name: Run Security Integration Tests on Linux runs-on: ubuntu-latest diff --git a/CHANGELOG.md b/CHANGELOG.md index 080555235..24db89106 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) ### Enhancements ### Bug Fixes ### Infrastructure +- Set Java target compatibility to JDK 21 ([#730](https://github.com/opensearch-project/flow-framework/pull/730)) + ### Documentation ### Maintenance ### Refactoring @@ -17,6 +19,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) ### Enhancements - Add Workflow Step for Reindex from source index to destination ([#718](https://github.com/opensearch-project/flow-framework/pull/718)) - Add param to delete workflow API to clear status even if resources exist ([#719](https://github.com/opensearch-project/flow-framework/pull/719)) + ### Bug Fixes - Add user mapping to Workflow State index ([#705](https://github.com/opensearch-project/flow-framework/pull/705)) diff --git a/DEVELOPER_GUIDE.md b/DEVELOPER_GUIDE.md index b4149ae6d..1f7d5f690 100644 --- a/DEVELOPER_GUIDE.md +++ b/DEVELOPER_GUIDE.md @@ -24,12 +24,12 @@ See [OpenSearch requirements](https://github.com/opensearch-project/OpenSearch/b #### Java -Flow Framework code currently maintains compatibility with JDK 11. Other plugins may require newer Java versions if used. +The Flow Framework `main` branch targets JDK 21. To ease backporting to `2.x`, maintain compatibility with JDK 11 unless significant benefits can be gained. Other plugins may require newer Java versions if used. ### Setup 1. Clone the repository (see [Forking and Cloning](#forking-and-cloning)) -2. Make sure `JAVA_HOME` is pointing to a Java 14 JDK (see [Install Prerequisites](#install-prerequisites)) +2. Make sure `JAVA_HOME` is pointing to a Java 21 or higher JDK (see [Install Prerequisites](#install-prerequisites)) 3. Launch Intellij IDEA, Choose Import Project and select the settings.gradle file in the root of this package. ### Build diff --git a/build.gradle b/build.gradle index 4f5f30a59..ef703f06f 100644 --- a/build.gradle +++ b/build.gradle @@ -149,8 +149,8 @@ allprojects { } java { - targetCompatibility = JavaVersion.VERSION_11 - sourceCompatibility = JavaVersion.VERSION_11 + targetCompatibility = JavaVersion.VERSION_21 + sourceCompatibility = JavaVersion.VERSION_21 } repositories { From 53126cbe1a7b6bb6bf41196039b811d5436328a5 Mon Sep 17 00:00:00 2001 From: Amit Galitzky Date: Fri, 7 Jun 2024 18:16:10 -0700 Subject: [PATCH 2/4] Adding additional default use cases (#731) * adding pretrained model templates Signed-off-by: Amit Galitzky * adding reindex Signed-off-by: Amit Galitzky * changing file structure for bwc Signed-off-by: Amit Galitzky --------- Signed-off-by: Amit Galitzky --- CHANGELOG.md | 2 +- build.gradle | 26 +++- .../flowframework/common/CommonValue.java | 2 + .../flowframework/common/DefaultUseCases.java | 23 +++ .../flowframework/workflow/ReindexStep.java | 16 ++- .../defaults/hybrid-search-defaults.json | 3 +- ...brid-search-with-local-model-defaults.json | 23 +++ .../defaults/multi-modal-search-defaults.json | 4 +- ...timodal-search-bedrock-titan-defaults.json | 4 +- ...ntic-search-with-local-model-defaults.json | 20 +++ ...semantic-search-with-reindex-defaults.json | 31 ++++ .../hybrid-search-template.json | 8 +- ...brid-search-with-local-model-template.json | 109 ++++++++++++++ .../multi-modal-search-template.json | 7 +- ...al-search-with-bedrock-titan-template.json | 7 +- ...eural-sparse-local-biencoder-template.json | 3 - .../semantic-search-template.json | 3 - ...ntic-search-with-local-model-template.json | 86 +++++++++++ ...ith-model-and-query-enricher-template.json | 3 - .../semantic-search-with-model-template.json | 3 - ...c-search-with-query-enricher-template.json | 3 - ...semantic-search-with-reindex-template.json | 135 ++++++++++++++++++ .../FlowFrameworkRestTestCase.java | 71 ++++++++- .../rest/FlowFrameworkRestApiIT.java | 84 ++++++++++- 24 files changed, 630 insertions(+), 46 deletions(-) create mode 100644 src/main/resources/defaults/hybrid-search-with-local-model-defaults.json create mode 100644 src/main/resources/defaults/semantic-search-with-local-model-defaults.json create mode 100644 src/main/resources/defaults/semantic-search-with-reindex-defaults.json create mode 100644 src/main/resources/substitutionTemplates/hybrid-search-with-local-model-template.json create mode 100644 src/main/resources/substitutionTemplates/semantic-search-with-local-model-template.json create mode 100644 src/main/resources/substitutionTemplates/semantic-search-with-reindex-template.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 24db89106..5c475bb3e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) ### Enhancements - Add Workflow Step for Reindex from source index to destination ([#718](https://github.com/opensearch-project/flow-framework/pull/718)) - Add param to delete workflow API to clear status even if resources exist ([#719](https://github.com/opensearch-project/flow-framework/pull/719)) - +- Add additional default use cases ([#731](https://github.com/opensearch-project/flow-framework/pull/731)) ### Bug Fixes - Add user mapping to Workflow State index ([#705](https://github.com/opensearch-project/flow-framework/pull/705)) diff --git a/build.gradle b/build.gradle index ef703f06f..698ae8a56 100644 --- a/build.gradle +++ b/build.gradle @@ -181,6 +181,8 @@ dependencies { // ZipArchive dependencies used for integration tests zipArchive group: 'org.opensearch.plugin', name:'opensearch-ml-plugin', version: "${opensearch_build}" + zipArchive group: 'org.opensearch.plugin', name:'opensearch-knn', version: "${opensearch_build}" + zipArchive group: 'org.opensearch.plugin', name:'neural-search', version: "${opensearch_build}" secureIntegTestPluginArchive group: 'org.opensearch.plugin', name:'opensearch-security', version: "${opensearch_build}" configurations.all { @@ -492,7 +494,29 @@ List> plugins = [ return new RegularFile() { @Override File getAsFile() { - return configurations.zipArchive.asFileTree.getSingleFile() + return configurations.zipArchive.asFileTree.matching{include "**/opensearch-ml-plugin-${opensearch_build}.zip"}.getSingleFile() + } + } + } + }), + provider(new Callable(){ + @Override + RegularFile call() throws Exception { + return new RegularFile() { + @Override + File getAsFile() { + return configurations.zipArchive.asFileTree.matching{include "**/opensearch-knn-${opensearch_build}.zip"}.getSingleFile() + } + } + } + }), + provider(new Callable(){ + @Override + RegularFile call() throws Exception { + return new RegularFile() { + @Override + File getAsFile() { + return configurations.zipArchive.asFileTree.matching{include "**/neural-search-${opensearch_build}.zip"}.getSingleFile() } } } diff --git a/src/main/java/org/opensearch/flowframework/common/CommonValue.java b/src/main/java/org/opensearch/flowframework/common/CommonValue.java index 87c2f2180..10a23357a 100644 --- a/src/main/java/org/opensearch/flowframework/common/CommonValue.java +++ b/src/main/java/org/opensearch/flowframework/common/CommonValue.java @@ -225,4 +225,6 @@ private CommonValue() {} public static final String CREATE_CONNECTOR_CREDENTIAL_SESSION_TOKEN = "create_connector.credential.session_token"; /** The field name for ingest pipeline model ID substitution */ public static final String CREATE_INGEST_PIPELINE_MODEL_ID = "create_ingest_pipeline.model_id"; + /** The field name for reindex source index substitution */ + public static final String REINDEX_SOURCE_INDEX = "reindex.source_index"; } diff --git a/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java b/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java index bc88f2b4d..7b8d06f1a 100644 --- a/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java +++ b/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java @@ -22,6 +22,7 @@ import static org.opensearch.flowframework.common.CommonValue.CREATE_CONNECTOR_CREDENTIAL_SECRET_KEY; import static org.opensearch.flowframework.common.CommonValue.CREATE_CONNECTOR_CREDENTIAL_SESSION_TOKEN; import static org.opensearch.flowframework.common.CommonValue.CREATE_INGEST_PIPELINE_MODEL_ID; +import static org.opensearch.flowframework.common.CommonValue.REINDEX_SOURCE_INDEX; /** * Enum encapsulating the different default use cases and templates we have stored @@ -132,6 +133,28 @@ public enum DefaultUseCases { "defaults/conversational-search-defaults.json", "substitutionTemplates/conversational-search-with-cohere-model-template.json", List.of(CREATE_CONNECTOR_CREDENTIAL_KEY) + ), + /** defaults file and substitution ready template for semantic search with a local pretrained model*/ + SEMANTIC_SEARCH_WITH_LOCAL_MODEL( + "semantic_search_with_local_model", + "defaults/semantic-search-with-local-model-defaults.json", + "substitutionTemplates/semantic-search-with-local-model-template.json", + Collections.emptyList() + + ), + /** defaults file and substitution ready template for hybrid search with a local pretrained model*/ + HYBRID_SEARCH_WITH_LOCAL_MODEL( + "hybrid_search_with_local_model", + "defaults/hybrid-search-with-local-model-defaults.json", + "substitutionTemplates/hybrid-search-with-local-model-template.json", + Collections.emptyList() + ), + /** defaults file and substitution ready template for semantic search with reindex command*/ + SEMANTIC_SEARCH_WITH_REINDEX( + "semantic_search_with_reindex", + "defaults/semantic-search-with-reindex-defaults.json", + "substitutionTemplates/semantic-search-with-reindex-template.json", + List.of(CREATE_CONNECTOR_CREDENTIAL_KEY, REINDEX_SOURCE_INDEX) ); private final String useCaseName; diff --git a/src/main/java/org/opensearch/flowframework/workflow/ReindexStep.java b/src/main/java/org/opensearch/flowframework/workflow/ReindexStep.java index bc335db97..b46ddecab 100644 --- a/src/main/java/org/opensearch/flowframework/workflow/ReindexStep.java +++ b/src/main/java/org/opensearch/flowframework/workflow/ReindexStep.java @@ -95,10 +95,20 @@ public PlainActionFuture execute( Float requestsPerSecond = inputs.containsKey(REQUESTS_PER_SECOND) ? Float.parseFloat(inputs.get(REQUESTS_PER_SECOND).toString()) : null; + requestsPerSecond = requestsPerSecond < 0 ? Float.POSITIVE_INFINITY : requestsPerSecond; Boolean requireAlias = inputs.containsKey(REQUIRE_ALIAS) ? Booleans.parseBoolean(inputs.get(REQUIRE_ALIAS).toString()) : null; - Integer slices = (Integer) inputs.get(SLICES); - Integer maxDocs = (Integer) inputs.get(MAX_DOCS); - + Integer slices; + Integer maxDocs; + if (inputs.get(SLICES) != null) { + slices = Integer.parseInt(String.valueOf(inputs.get(SLICES))); + } else { + slices = (Integer) inputs.get(SLICES); + } + if (inputs.get(MAX_DOCS) != null) { + maxDocs = Integer.parseInt(String.valueOf(inputs.get(MAX_DOCS))); + } else { + maxDocs = (Integer) inputs.get(MAX_DOCS); + } ReindexRequest reindexRequest = new ReindexRequest().setSourceIndices(Strings.splitStringByCommaToArray(sourceIndices)) .setDestIndex(destinationIndex); diff --git a/src/main/resources/defaults/hybrid-search-defaults.json b/src/main/resources/defaults/hybrid-search-defaults.json index cf9fb584b..b64bce6ae 100644 --- a/src/main/resources/defaults/hybrid-search-defaults.json +++ b/src/main/resources/defaults/hybrid-search-defaults.json @@ -14,6 +14,5 @@ "text_embedding.field_map.output.dimension": "1024", "create_search_pipeline.pipeline_id": "nlp-search-pipeline", "normalization-processor.normalization.technique": "min_max", - "normalization-processor.combination.technique": "arithmetic_mean", - "normalization-processor.combination.parameters.weights": "[0.3, 0.7]" + "normalization-processor.combination.technique": "arithmetic_mean" } diff --git a/src/main/resources/defaults/hybrid-search-with-local-model-defaults.json b/src/main/resources/defaults/hybrid-search-with-local-model-defaults.json new file mode 100644 index 000000000..d07cc918d --- /dev/null +++ b/src/main/resources/defaults/hybrid-search-with-local-model-defaults.json @@ -0,0 +1,23 @@ +{ + "template.name": "hybrid-search", + "template.description": "Setting up hybrid search, ingest pipeline and index", + "register_local_pretrained_model.name": "huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2", + "register_local_pretrained_model.description": "This is a sentence transformer model", + "register_local_pretrained_model.model_format": "TORCH_SCRIPT", + "register_local_pretrained_model.deploy": "true", + "register_local_pretrained_model.version": "1.0.1", + "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline", + "create_ingest_pipeline.description": "A text embedding pipeline", + "create_ingest_pipeline.model_id": "123", + "text_embedding.field_map.input": "passage_text", + "text_embedding.field_map.output": "passage_embedding", + "create_index.name": "my-nlp-index", + "create_index.settings.number_of_shards": "2", + "create_index.mappings.method.engine": "lucene", + "create_index.mappings.method.space_type": "l2", + "create_index.mappings.method.name": "hnsw", + "text_embedding.field_map.output.dimension": "768", + "create_search_pipeline.pipeline_id": "nlp-search-pipeline", + "normalization-processor.normalization.technique": "min_max", + "normalization-processor.combination.technique": "arithmetic_mean" +} diff --git a/src/main/resources/defaults/multi-modal-search-defaults.json b/src/main/resources/defaults/multi-modal-search-defaults.json index 0588e7182..4e0f86449 100644 --- a/src/main/resources/defaults/multi-modal-search-defaults.json +++ b/src/main/resources/defaults/multi-modal-search-defaults.json @@ -11,5 +11,7 @@ "create_index.settings.number_of_shards": "2", "text_image_embedding.field_map.output.dimension": "1024", "create_index.mappings.method.engine": "lucene", - "create_index.mappings.method.name": "hnsw" + "create_index.mappings.method.name": "hnsw", + "text_image_embedding.field_map.image.type": "text", + "text_image_embedding.field_map.text.type": "text" } diff --git a/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json b/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json index b6d6a0ff9..3a6a09b21 100644 --- a/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json +++ b/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json @@ -24,5 +24,7 @@ "create_index.settings.number_of_shards": "2", "text_image_embedding.field_map.output.dimension": "1024", "create_index.mappings.method.engine": "lucene", - "create_index.mappings.method.name": "hnsw" + "create_index.mappings.method.name": "hnsw", + "text_image_embedding.field_map.image.type": "text", + "text_image_embedding.field_map.text.type": "text" } diff --git a/src/main/resources/defaults/semantic-search-with-local-model-defaults.json b/src/main/resources/defaults/semantic-search-with-local-model-defaults.json new file mode 100644 index 000000000..89fad8465 --- /dev/null +++ b/src/main/resources/defaults/semantic-search-with-local-model-defaults.json @@ -0,0 +1,20 @@ +{ + "template.name": "semantic search with local pretrained model", + "template.description": "Setting up semantic search, with a local pretrained embedding model", + "register_local_pretrained_model.name": "huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2", + "register_local_pretrained_model.description": "This is a sentence transformer model", + "register_local_pretrained_model.model_format": "TORCH_SCRIPT", + "register_local_pretrained_model.deploy": "true", + "register_local_pretrained_model.version": "1.0.1", + "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline", + "create_ingest_pipeline.description": "A text embedding pipeline", + "text_embedding.field_map.input": "passage_text", + "text_embedding.field_map.output": "passage_embedding", + "create_index.name": "my-nlp-index", + "create_index.settings.number_of_shards": "2", + "create_index.mappings.method.engine": "lucene", + "create_index.mappings.method.space_type": "l2", + "create_index.mappings.method.name": "hnsw", + "text_embedding.field_map.output.dimension": "768", + "create_search_pipeline.pipeline_id": "default_model_pipeline" +} diff --git a/src/main/resources/defaults/semantic-search-with-reindex-defaults.json b/src/main/resources/defaults/semantic-search-with-reindex-defaults.json new file mode 100644 index 000000000..b59780ee9 --- /dev/null +++ b/src/main/resources/defaults/semantic-search-with-reindex-defaults.json @@ -0,0 +1,31 @@ +{ + "template.name": "semantic search with cohere embedding", + "template.description": "Setting up semantic search, with a Cohere embedding model", + "create_connector.name": "cohere-embedding-connector", + "create_connector.description": "The connector to Cohere's public embed API", + "create_connector.protocol": "http", + "create_connector.model": "embed-english-v3.0", + "create_connector.input_type": "search_document", + "create_connector.truncate": "end", + "create_connector.credential.key": "123", + "create_connector.actions.url": "https://api.cohere.ai/v1/embed", + "create_connector.actions.request_body": "{ \"texts\": ${parameters.texts}, \"truncate\": \"${parameters.truncate}\", \"model\": \"${parameters.model}\", \"input_type\": \"${parameters.input_type}\" }", + "create_connector.actions.pre_process_function": "connector.pre_process.cohere.embedding", + "create_connector.actions.post_process_function": "connector.post_process.cohere.embedding", + "register_remote_model.name": "Cohere english embed model", + "register_remote_model.description": "cohere-embedding-model", + "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline", + "create_ingest_pipeline.description": "A text embedding pipeline", + "text_embedding.field_map.input": "passage_text", + "text_embedding.field_map.output": "passage_embedding", + "create_index.name": "my-nlp-index", + "create_index.settings.number_of_shards": "2", + "create_index.mappings.method.engine": "lucene", + "create_index.mappings.method.space_type": "l2", + "create_index.mappings.method.name": "hnsw", + "text_embedding.field_map.output.dimension": "1024", + "create_search_pipeline.pipeline_id": "default_model_pipeline", + "reindex.source_index": "", + "reindex.requests_per_second": "-1", + "reindex.slices": "1" +} diff --git a/src/main/resources/substitutionTemplates/hybrid-search-template.json b/src/main/resources/substitutionTemplates/hybrid-search-template.json index 9e16f1d09..1669ba7a7 100644 --- a/src/main/resources/substitutionTemplates/hybrid-search-template.json +++ b/src/main/resources/substitutionTemplates/hybrid-search-template.json @@ -50,9 +50,6 @@ "mappings": { "_doc": { "properties": { - "id": { - "type": "text" - }, "${{text_embedding.field_map.output}}": { "type": "knn_vector", "dimension": "${{text_embedding.field_map.output.dimension}}", @@ -86,10 +83,7 @@ "technique": "${{normalization-processor.normalization.technique}}" }, "combination": { - "technique": "${{normalization-processor.combination.technique}}", - "parameters": { - "weights": "${{normalization-processor.combination.parameters.weights}}" - } + "technique": "${{normalization-processor.combination.technique}}" } } } diff --git a/src/main/resources/substitutionTemplates/hybrid-search-with-local-model-template.json b/src/main/resources/substitutionTemplates/hybrid-search-with-local-model-template.json new file mode 100644 index 000000000..457746ab4 --- /dev/null +++ b/src/main/resources/substitutionTemplates/hybrid-search-with-local-model-template.json @@ -0,0 +1,109 @@ +{ + "name": "${{template.name}}", + "description": "${{template.description}}", + "use_case": "HYBRID_SEARCH", + "version": { + "template": "1.0.0", + "compatibility": [ + "2.12.0", + "3.0.0" + ] + }, + "workflows": { + "provision": { + "nodes": [ + { + "id": "register_local_pretrained_model", + "type": "register_local_pretrained_model", + "user_inputs": { + "name": "${{register_local_pretrained_model.name}}", + "version": "${{register_local_pretrained_model.version}}", + "description": "${{register_local_pretrained_model.description}}", + "model_format": "${{register_local_pretrained_model.model_format}}", + "deploy": true + } + }, + { + "id": "create_ingest_pipeline", + "type": "create_ingest_pipeline", + "previous_node_inputs": { + "register_local_pretrained_model": "model_id" + }, + "user_inputs": { + "pipeline_id": "${{create_ingest_pipeline.pipeline_id}}", + "configurations": { + "description": "${{create_ingest_pipeline.description}}", + "processors": [ + { + "text_embedding": { + "model_id": "${{register_local_pretrained_model.model_id}}", + "field_map": { + "${{text_embedding.field_map.input}}": "${{text_embedding.field_map.output}}" + } + } + } + ] + } + } + }, + { + "id": "create_index", + "type": "create_index", + "previous_node_inputs": { + "create_ingest_pipeline": "pipeline_id" + }, + "user_inputs": { + "index_name": "${{create_index.name}}", + "configurations": { + "settings": { + "index.knn": true, + "default_pipeline": "${{create_ingest_pipeline.pipeline_id}}", + "number_of_shards": "${{create_index.settings.number_of_shards}}", + "index.search.default_pipeline": "${{create_search_pipeline.pipeline_id}}" + }, + "mappings": { + "properties": { + "${{text_embedding.field_map.output}}": { + "type": "knn_vector", + "dimension": "${{text_embedding.field_map.output.dimension}}", + "method": { + "engine": "${{create_index.mappings.method.engine}}", + "space_type": "${{create_index.mappings.method.space_type}}", + "name": "${{create_index.mappings.method.name}}", + "parameters": {} + } + }, + "${{text_embedding.field_map.input}}": { + "type": "text" + } + } + } + } + } + }, + { + "id": "create_search_pipeline", + "type": "create_search_pipeline", + "user_inputs": { + "pipeline_id": "${{create_search_pipeline.pipeline_id}}", + "configurations": { + "description": "Post processor for hybrid search", + "phase_results_processors": [ + { + "normalization-processor": { + "normalization": { + "technique": "${{normalization-processor.normalization.technique}}" + }, + "combination": { + "technique": "${{normalization-processor.combination.technique}}" + } + } + } + ] + } + } + } + ] + } + } +} diff --git a/src/main/resources/substitutionTemplates/multi-modal-search-template.json b/src/main/resources/substitutionTemplates/multi-modal-search-template.json index f6a14dc75..bad7f4a52 100644 --- a/src/main/resources/substitutionTemplates/multi-modal-search-template.json +++ b/src/main/resources/substitutionTemplates/multi-modal-search-template.json @@ -51,9 +51,6 @@ "mappings": { "_doc": { "properties": { - "id": { - "type": "text" - }, "${{text_image_embedding.embedding}}": { "type": "knn_vector", "dimension": "${{text_image_embedding.field_map.output.dimension}}", @@ -64,10 +61,10 @@ } }, "${{text_image_embedding.field_map.text}}": { - "type": "text" + "type": "${{text_image_embedding.field_map.text.type}}" }, "${{text_image_embedding.field_map.image}}": { - "type": "binary" + "type": "${{text_image_embedding.field_map.image.type}}" } } } diff --git a/src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json b/src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json index da85a9387..e36370a73 100644 --- a/src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json +++ b/src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json @@ -101,9 +101,6 @@ "mappings": { "_doc": { "properties": { - "id": { - "type": "text" - }, "${{text_image_embedding.embedding}}": { "type": "knn_vector", "dimension": "${{text_image_embedding.field_map.output.dimension}}", @@ -114,10 +111,10 @@ } }, "${{text_image_embedding.field_map.text}}": { - "type": "text" + "type": "${{text_image_embedding.field_map.text.type}}" }, "${{text_image_embedding.field_map.image}}": { - "type": "binary" + "type": "${{text_image_embedding.field_map.image.type}}" } } } diff --git a/src/main/resources/substitutionTemplates/neural-sparse-local-biencoder-template.json b/src/main/resources/substitutionTemplates/neural-sparse-local-biencoder-template.json index 603e462ee..737d2f438 100644 --- a/src/main/resources/substitutionTemplates/neural-sparse-local-biencoder-template.json +++ b/src/main/resources/substitutionTemplates/neural-sparse-local-biencoder-template.json @@ -61,9 +61,6 @@ "mappings": { "_doc": { "properties": { - "id": { - "type": "text" - }, "${{create_ingest_pipeline.text_embedding.field_map.output}}": { "type": "rank_features" }, diff --git a/src/main/resources/substitutionTemplates/semantic-search-template.json b/src/main/resources/substitutionTemplates/semantic-search-template.json index 3aa7095e1..d592f1ec1 100644 --- a/src/main/resources/substitutionTemplates/semantic-search-template.json +++ b/src/main/resources/substitutionTemplates/semantic-search-template.json @@ -49,9 +49,6 @@ "mappings": { "_doc": { "properties": { - "id": { - "type": "text" - }, "${{text_embedding.field_map.output}}": { "type": "knn_vector", "dimension": "${{text_embedding.field_map.output.dimension}}", diff --git a/src/main/resources/substitutionTemplates/semantic-search-with-local-model-template.json b/src/main/resources/substitutionTemplates/semantic-search-with-local-model-template.json new file mode 100644 index 000000000..125554b78 --- /dev/null +++ b/src/main/resources/substitutionTemplates/semantic-search-with-local-model-template.json @@ -0,0 +1,86 @@ +{ + "name": "${{template.name}}", + "description": "${{template.description}}", + "use_case": "SEMANTIC_SEARCH", + "version": { + "template": "1.0.0", + "compatibility": [ + "2.12.0", + "3.0.0" + ] + }, + "workflows": { + "provision": { + "nodes": [ + { + "id": "register_local_pretrained_model", + "type": "register_local_pretrained_model", + "user_inputs": { + "name": "${{register_local_pretrained_model.name}}", + "version": "${{register_local_pretrained_model.version}}", + "description": "${{register_local_pretrained_model.description}}", + "model_format": "${{register_local_pretrained_model.model_format}}", + "deploy": true + } + }, + { + "id": "create_ingest_pipeline", + "type": "create_ingest_pipeline", + "previous_node_inputs": { + "register_local_pretrained_model": "model_id" + }, + "user_inputs": { + "pipeline_id": "${{create_ingest_pipeline.pipeline_id}}", + "configurations": { + "description": "${{create_ingest_pipeline.description}}", + "processors": [ + { + "text_embedding": { + "model_id": "${{register_local_pretrained_model.model_id}}", + "field_map": { + "${{text_embedding.field_map.input}}": "${{text_embedding.field_map.output}}" + } + } + } + ] + } + } + }, + { + "id": "create_index", + "type": "create_index", + "previous_node_inputs": { + "create_ingest_pipeline": "pipeline_id" + }, + "user_inputs": { + "index_name": "${{create_index.name}}", + "configurations": { + "settings": { + "index.knn": true, + "default_pipeline": "${{create_ingest_pipeline.pipeline_id}}", + "number_of_shards": "${{create_index.settings.number_of_shards}}" + }, + "mappings": { + "properties": { + "${{text_embedding.field_map.output}}": { + "type": "knn_vector", + "dimension": "${{text_embedding.field_map.output.dimension}}", + "method": { + "engine": "${{create_index.mappings.method.engine}}", + "space_type": "${{create_index.mappings.method.space_type}}", + "name": "${{create_index.mappings.method.name}}", + "parameters": {} + } + }, + "${{text_embedding.field_map.input}}": { + "type": "text" + } + } + } + } + } + } + ] + } + } +} diff --git a/src/main/resources/substitutionTemplates/semantic-search-with-model-and-query-enricher-template.json b/src/main/resources/substitutionTemplates/semantic-search-with-model-and-query-enricher-template.json index f75b58e06..71f8286cd 100644 --- a/src/main/resources/substitutionTemplates/semantic-search-with-model-and-query-enricher-template.json +++ b/src/main/resources/substitutionTemplates/semantic-search-with-model-and-query-enricher-template.json @@ -99,9 +99,6 @@ "mappings": { "_doc": { "properties": { - "id": { - "type": "text" - }, "${{text_embedding.field_map.output}}": { "type": "knn_vector", "dimension": "${{text_embedding.field_map.output.dimension}}", diff --git a/src/main/resources/substitutionTemplates/semantic-search-with-model-template.json b/src/main/resources/substitutionTemplates/semantic-search-with-model-template.json index f98c68659..c2261c475 100644 --- a/src/main/resources/substitutionTemplates/semantic-search-with-model-template.json +++ b/src/main/resources/substitutionTemplates/semantic-search-with-model-template.json @@ -98,9 +98,6 @@ "mappings": { "_doc": { "properties": { - "id": { - "type": "text" - }, "${{text_embedding.field_map.output}}": { "type": "knn_vector", "dimension": "${{text_embedding.field_map.output.dimension}}", diff --git a/src/main/resources/substitutionTemplates/semantic-search-with-query-enricher-template.json b/src/main/resources/substitutionTemplates/semantic-search-with-query-enricher-template.json index 4244cd791..6e33d04c5 100644 --- a/src/main/resources/substitutionTemplates/semantic-search-with-query-enricher-template.json +++ b/src/main/resources/substitutionTemplates/semantic-search-with-query-enricher-template.json @@ -67,9 +67,6 @@ "mappings": { "_doc": { "properties": { - "id": { - "type": "text" - }, "${{text_embedding.field_map.output}}": { "type": "knn_vector", "dimension": "${{text_embedding.field_map.output.dimension}}", diff --git a/src/main/resources/substitutionTemplates/semantic-search-with-reindex-template.json b/src/main/resources/substitutionTemplates/semantic-search-with-reindex-template.json new file mode 100644 index 000000000..6460eabdc --- /dev/null +++ b/src/main/resources/substitutionTemplates/semantic-search-with-reindex-template.json @@ -0,0 +1,135 @@ +{ + "name": "${{template.name}}", + "description": "${{template.description}}", + "use_case": "SEMANTIC_SEARCH", + "version": { + "template": "1.0.0", + "compatibility": [ + "2.12.0", + "3.0.0" + ] + }, + "workflows": { + "provision": { + "nodes": [ + { + "id": "create_connector", + "type": "create_connector", + "user_inputs": { + "name": "${{create_connector.name}}", + "description": "${{create_connector.description}}", + "version": "1", + "protocol": "${{create_connector.protocol}}", + "parameters": { + "endpoint": "${{create_connector.endpoint}}", + "model": "${{create_connector.model}}", + "input_type": "search_document", + "truncate": "END" + }, + "credential": { + "key": "${{create_connector.credential.key}}" + }, + "actions": [ + { + "action_type": "predict", + "method": "POST", + "url": "${{create_connector.actions.url}}", + "headers": { + "Authorization": "Bearer ${credential.key}", + "Request-Source": "unspecified:opensearch" + }, + "request_body": "${{create_connector.actions.request_body}}", + "pre_process_function": "${{create_connector.actions.pre_process_function}}", + "post_process_function": "${{create_connector.actions.post_process_function}}" + } + ] + } + }, + { + "id": "register_model", + "type": "register_remote_model", + "previous_node_inputs": { + "create_connector": "connector_id" + }, + "user_inputs": { + "name": "${{register_remote_model.name}}", + "function_name": "remote", + "description": "${{register_remote_model.description}}", + "deploy": true + } + }, + { + "id": "create_ingest_pipeline", + "type": "create_ingest_pipeline", + "previous_node_inputs": { + "register_model": "model_id" + }, + "user_inputs": { + "pipeline_id": "${{create_ingest_pipeline.pipeline_id}}", + "configurations": { + "description": "${{create_ingest_pipeline.description}}", + "processors": [ + { + "text_embedding": { + "model_id": "${{register_model.model_id}}", + "field_map": { + "${{text_embedding.field_map.input}}": "${{text_embedding.field_map.output}}" + } + } + } + ] + } + } + }, + { + "id": "create_index", + "type": "create_index", + "previous_node_inputs": { + "create_ingest_pipeline": "pipeline_id" + }, + "user_inputs": { + "index_name": "${{create_index.name}}", + "configurations": { + "settings": { + "index.knn": true, + "default_pipeline": "${{create_ingest_pipeline.pipeline_id}}", + "number_of_shards": "${{create_index.settings.number_of_shards}}" + }, + "mappings": { + "properties": { + "${{text_embedding.field_map.output}}": { + "type": "knn_vector", + "dimension": "${{text_embedding.field_map.output.dimension}}", + "method": { + "engine": "${{create_index.mappings.method.engine}}", + "space_type": "${{create_index.mappings.method.space_type}}", + "name": "${{create_index.mappings.method.name}}", + "parameters": {} + } + }, + "${{text_embedding.field_map.input}}": { + "type": "text" + } + } + } + } + } + }, + { + "id": "reindex", + "type": "reindex", + "previous_node_inputs": { + "create_index": "index_name" + }, + "user_inputs": { + "source_index": "${{reindex.source_index}}", + "destination_index": "${{create_index.name}}", + "refresh": false, + "requests_per_second": "${{reindex.requests_per_second}}", + "slices": "${{reindex.slices}}" + } + } + ] + } + } +} diff --git a/src/test/java/org/opensearch/flowframework/FlowFrameworkRestTestCase.java b/src/test/java/org/opensearch/flowframework/FlowFrameworkRestTestCase.java index 9a1d89c2e..922c26b0f 100644 --- a/src/test/java/org/opensearch/flowframework/FlowFrameworkRestTestCase.java +++ b/src/test/java/org/opensearch/flowframework/FlowFrameworkRestTestCase.java @@ -48,6 +48,7 @@ import org.opensearch.flowframework.model.State; import org.opensearch.flowframework.model.Template; import org.opensearch.flowframework.model.WorkflowState; +import org.opensearch.flowframework.util.ParseUtils; import org.opensearch.ml.repackage.com.google.common.collect.ImmutableList; import org.opensearch.test.rest.OpenSearchRestTestCase; import org.junit.After; @@ -350,7 +351,7 @@ protected Response createWorkflow(RestClient client, Template template) throws E * @throws Exception if the request fails * @return a rest response */ - protected Response createWorkflowWithUseCase(RestClient client, String useCase, List params) throws Exception { + protected Response createWorkflowWithUseCaseWithNoValidation(RestClient client, String useCase, List params) throws Exception { StringBuilder sb = new StringBuilder(); for (String param : params) { @@ -370,6 +371,28 @@ protected Response createWorkflowWithUseCase(RestClient client, String useCase, ); } + /** + * Helper method to invoke the create workflow API with a use case and also the provision param as true + * @param client the rest client + * @param useCase the usecase to create + * @param defaults the defaults to override given through the request payload + * @throws Exception if the request fails + * @return a rest response + */ + protected Response createAndProvisionWorkflowWithUseCaseWithContent(RestClient client, String useCase, Map defaults) + throws Exception { + String payload = ParseUtils.parseArbitraryStringToObjectMapToString(defaults); + + return TestHelpers.makeRequest( + client, + "POST", + WORKFLOW_URI + "?provision=true&use_case=" + useCase, + Collections.emptyMap(), + payload, + null + ); + } + /** * Helper method to invoke the Create Workflow Rest Action with provision * @param client the rest client @@ -742,6 +765,52 @@ protected GetPipelineResponse getPipelines(String pipelineId) throws IOException } } + protected void ingestSingleDoc(String payload, String indexName) throws IOException { + try { + TestHelpers.makeRequest( + client(), + "PUT", + indexName + "/_doc/1", + null, + payload, + ImmutableList.of(new BasicHeader(HttpHeaders.USER_AGENT, "")) + ); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + protected SearchResponse neuralSearchRequest(String indexName, String modelId) throws IOException { + String searchRequest = + "{\"_source\":{\"excludes\":[\"passage_embedding\"]},\"query\":{\"neural\":{\"passage_embedding\":{\"query_text\":\"world\",\"k\":5,\"model_id\":\"" + + modelId + + "\"}}}}"; + try { + Response restSearchResponse = TestHelpers.makeRequest( + client(), + "POST", + indexName + "/_search", + null, + searchRequest, + ImmutableList.of(new BasicHeader(HttpHeaders.USER_AGENT, "")) + ); + // Parse entity content into SearchResponse + MediaType mediaType = MediaType.fromMediaType(restSearchResponse.getEntity().getContentType()); + try ( + XContentParser parser = mediaType.xContent() + .createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + restSearchResponse.getEntity().getContent() + ) + ) { + return SearchResponse.fromXContent(parser); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + @SuppressWarnings("unchecked") protected List catPlugins() throws IOException { Response response = TestHelpers.makeRequest( diff --git a/src/test/java/org/opensearch/flowframework/rest/FlowFrameworkRestApiIT.java b/src/test/java/org/opensearch/flowframework/rest/FlowFrameworkRestApiIT.java index 7f51cd276..6dfad72f1 100644 --- a/src/test/java/org/opensearch/flowframework/rest/FlowFrameworkRestApiIT.java +++ b/src/test/java/org/opensearch/flowframework/rest/FlowFrameworkRestApiIT.java @@ -34,6 +34,7 @@ import java.time.Instant; import java.util.Collections; import java.util.EnumSet; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -429,7 +430,11 @@ public void testCreateAndProvisionIngestAndSearchPipeline() throws Exception { public void testDefaultCohereUseCase() throws Exception { // Hit Create Workflow API with original template - Response response = createWorkflowWithUseCase(client(), "cohere_embedding_model_deploy", List.of(CREATE_CONNECTOR_CREDENTIAL_KEY)); + Response response = createWorkflowWithUseCaseWithNoValidation( + client(), + "cohere_embedding_model_deploy", + List.of(CREATE_CONNECTOR_CREDENTIAL_KEY) + ); assertEquals(RestStatus.CREATED, TestHelpers.restStatus(response)); Map responseMap = entityAsMap(response); @@ -468,7 +473,7 @@ public void testDefaultSemanticSearchUseCaseWithFailureExpected() throws Excepti // Hit Create Workflow API with original template without required params ResponseException exception = expectThrows( ResponseException.class, - () -> createWorkflowWithUseCase(client(), "semantic_search", Collections.emptyList()) + () -> createWorkflowWithUseCaseWithNoValidation(client(), "semantic_search", Collections.emptyList()) ); assertTrue( exception.getMessage() @@ -476,7 +481,11 @@ public void testDefaultSemanticSearchUseCaseWithFailureExpected() throws Excepti ); // Pass in required params - Response response = createWorkflowWithUseCase(client(), "semantic_search", List.of(CREATE_INGEST_PIPELINE_MODEL_ID)); + Response response = createWorkflowWithUseCaseWithNoValidation( + client(), + "semantic_search", + List.of(CREATE_INGEST_PIPELINE_MODEL_ID) + ); assertEquals(RestStatus.CREATED, TestHelpers.restStatus(response)); Map responseMap = entityAsMap(response); @@ -502,7 +511,7 @@ public void testAllDefaultUseCasesCreation() throws Exception { .collect(Collectors.toSet()); for (String useCaseName : allUseCaseNames) { - Response response = createWorkflowWithUseCase( + Response response = createWorkflowWithUseCaseWithNoValidation( client(), useCaseName, DefaultUseCases.getRequiredParamsByUseCaseName(useCaseName) @@ -514,4 +523,71 @@ public void testAllDefaultUseCasesCreation() throws Exception { getAndAssertWorkflowStatus(client(), workflowId, State.NOT_STARTED, ProvisioningProgress.NOT_STARTED); } } + + public void testSemanticSearchWithLocalModelEndToEnd() throws Exception { + // Checking if plugins are part of the integration test cluster so we can continue with this test + List plugins = catPlugins(); + if (!plugins.contains("opensearch-knn") && plugins.contains("opensearch-neural-search")) { + return; + } + Map defaults = new HashMap<>(); + defaults.put("register_local_pretrained_model.name", "huggingface/sentence-transformers/all-MiniLM-L6-v2"); + defaults.put("register_local_pretrained_model.version", "1.0.1"); + defaults.put("text_embedding.field_map.output.dimension", 384); + + Response response = createAndProvisionWorkflowWithUseCaseWithContent(client(), "semantic_search_with_local_model", defaults); + assertEquals(RestStatus.CREATED, TestHelpers.restStatus(response)); + + Map responseMap = entityAsMap(response); + String workflowId = (String) responseMap.get(WORKFLOW_ID); + getAndAssertWorkflowStatus(client(), workflowId, State.PROVISIONING, ProvisioningProgress.IN_PROGRESS); + + // Wait until provisioning has completed successfully before attempting to retrieve created resources + List resourcesCreated = getResourcesCreated(client(), workflowId, 45); + + // This template should create 4 resources, registered model_id, deployed model_id, ingest pipeline, and index name + assertEquals(4, resourcesCreated.size()); + String modelId = resourcesCreated.get(1).resourceId(); + String indexName = resourcesCreated.get(3).resourceId(); + + // Short wait before ingesting data + Thread.sleep(30000); + + String docContent = "{\"passage_text\": \"Hello planet\"\n}"; + ingestSingleDoc(docContent, indexName); + // Short wait before neural search + Thread.sleep(500); + SearchResponse neuralSearchResponse = neuralSearchRequest(indexName, modelId); + assertEquals(neuralSearchResponse.getHits().getHits().length, 1); + Thread.sleep(500); + deleteIndex(indexName); + + // Hit Deprovision API + // By design, this may not completely deprovision the first time if it takes >2s to process removals + Response deprovisionResponse = deprovisionWorkflow(client(), workflowId); + try { + assertBusy( + () -> { getAndAssertWorkflowStatus(client(), workflowId, State.NOT_STARTED, ProvisioningProgress.NOT_STARTED); }, + 30, + TimeUnit.SECONDS + ); + } catch (ComparisonFailure e) { + // 202 return if still processing + assertEquals(RestStatus.ACCEPTED, TestHelpers.restStatus(deprovisionResponse)); + } + if (TestHelpers.restStatus(deprovisionResponse) == RestStatus.ACCEPTED) { + // Short wait before we try again + Thread.sleep(10000); + deprovisionResponse = deprovisionWorkflow(client(), workflowId); + assertBusy( + () -> { getAndAssertWorkflowStatus(client(), workflowId, State.NOT_STARTED, ProvisioningProgress.NOT_STARTED); }, + 30, + TimeUnit.SECONDS + ); + } + assertEquals(RestStatus.OK, TestHelpers.restStatus(deprovisionResponse)); + // Hit Delete API + Response deleteResponse = deleteWorkflow(client(), workflowId); + assertEquals(RestStatus.OK, TestHelpers.restStatus(deleteResponse)); + } } From 970472763a5ddac312259325c9ae01911dc015a3 Mon Sep 17 00:00:00 2001 From: Owais Kazi Date: Mon, 10 Jun 2024 09:26:33 -0700 Subject: [PATCH 3/4] Added conversation search default use case with RAG tool (#732) Added conversation search use case with RAG tool Signed-off-by: owaiskazi19 --- CHANGELOG.md | 2 + .../flowframework/common/DefaultUseCases.java | 8 + ...nversational-search-rag-tool-defaults.json | 35 +++ ...search-with-bedrock-rag-tool-template.json | 218 ++++++++++++++++++ ...nal-search-with-cohere-model-template.json | 2 +- 5 files changed, 264 insertions(+), 1 deletion(-) create mode 100644 src/main/resources/defaults/conversational-search-rag-tool-defaults.json create mode 100644 src/main/resources/substitutionTemplates/conversational-search-with-bedrock-rag-tool-template.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c475bb3e..9a6463dd3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,8 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) - Add Workflow Step for Reindex from source index to destination ([#718](https://github.com/opensearch-project/flow-framework/pull/718)) - Add param to delete workflow API to clear status even if resources exist ([#719](https://github.com/opensearch-project/flow-framework/pull/719)) - Add additional default use cases ([#731](https://github.com/opensearch-project/flow-framework/pull/731)) +- Add conversation search default use case with RAG tool ([#732](https://github.com/opensearch-project/flow-framework/pull/732)) + ### Bug Fixes - Add user mapping to Workflow State index ([#705](https://github.com/opensearch-project/flow-framework/pull/705)) diff --git a/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java b/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java index 7b8d06f1a..a982fc9c2 100644 --- a/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java +++ b/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java @@ -155,6 +155,14 @@ public enum DefaultUseCases { "defaults/semantic-search-with-reindex-defaults.json", "substitutionTemplates/semantic-search-with-reindex-template.json", List.of(CREATE_CONNECTOR_CREDENTIAL_KEY, REINDEX_SOURCE_INDEX) + ), + + /** defaults file and substitution ready template for conversational search with bedrock chat model*/ + CONVERSATIONAL_SEARCH_WITH_BEDROCK_DEPLOY( + "conversational-search-with-bedrock-rag-tool", + "defaults/conversational-search-rag-tool-defaults.json", + "substitutionTemplates/conversational-search-with-bedrock-rag-tool-template.json", + List.of(CREATE_CONNECTOR_CREDENTIAL_ACCESS_KEY, CREATE_CONNECTOR_CREDENTIAL_SECRET_KEY, CREATE_CONNECTOR_CREDENTIAL_SESSION_TOKEN) ); private final String useCaseName; diff --git a/src/main/resources/defaults/conversational-search-rag-tool-defaults.json b/src/main/resources/defaults/conversational-search-rag-tool-defaults.json new file mode 100644 index 000000000..c69d3eb17 --- /dev/null +++ b/src/main/resources/defaults/conversational-search-rag-tool-defaults.json @@ -0,0 +1,35 @@ +{ + "template.name": "deploy-bedrock-chat-model", + "template.description": "A template to deploy a Bedrock chat model", + "create_bedrock_connector.name": "Amazon Bedrock Connector: Claude Instant V1", + "create_bedrock_connector.description": "The connector to bedrock Claude model", + "create_bedrock_connector.protocol": "aws_sigv4", + "create_bedrock_connector.actions.url": "https://bedrock-runtime.us-west-2.amazonaws.com/model/anthropic.claude-instant-v1/invoke", + "create_bedrock_connector.actions.request_body": "{\"prompt\":\"${parameters.prompt}\", \"max_tokens_to_sample\":${parameters.max_tokens_to_sample}, \"temperature\":${parameters.temperature}, \"anthropic_version\":\"${parameters.anthropic_version}\" }", + "create_bedrock_connector.credential.access_key": "", + "create_bedrock_connector.credential.secret_key": "", + "create_bedrock_connector.credential.session_token": "", + "create_bedrock_connector.region": "us-west-2", + "create_embedding_connector.name": "Amazon Bedrock Connector: embedding", + "create_embedding_connector.description": "The connector to bedrock Titan embedding model", + "create_embedding_connector.protocol": "aws_sigv4", + "create_embedding_connector.actions.url": "https://bedrock-runtime.us-west-2.amazonaws.com/model/amazon.titan-embed-text-v2:0/invoke", + "create_embedding_connector.actions.request_body": "{ \"inputText\": \"${parameters.inputText}\" }", + "register_bedrock_model.name": "anthropic.claude-v2", + "register_bedrock_model.description": "bedrock-chat-model", + "register_bedrock_embedding_model.name": "Bedrock embedding model v2", + "register_bedrock_embedding_model.description": "Bedrock embedding model v2", + "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline", + "create_ingest_pipeline.description": "A neural ingest pipeline", + "text_embedding.field_map.input": "passage_text", + "text_embedding.field_map.output": "passage_embedding", + "create_index.name": "my-nlp-index", + "create_index.mappings.method.engine": "lucene", + "create_index.mappings.method.space_type": "l2", + "create_index.mappings.method.name": "hnsw", + "text_embedding.field_map.output.dimension": "1024", + "rag_tool.parameters.prompt": "\n\nHuman:You are a professional data analysist. You will always answer question based on the given context first. If the answer is not directly shown in the context, you will analyze the data and find the answer. If you don't know the answer, just say don't know. \n\n Context:\n${parameters.output_field:-}\n\n\nHuman:${parameters.question}\n\nAssistant:", + "root_agent.parameters.parameters": "Answer the question as best you can.", + "root_agent.name": "Root agent", + "root_agent.description": "this is the root agent" +} diff --git a/src/main/resources/substitutionTemplates/conversational-search-with-bedrock-rag-tool-template.json b/src/main/resources/substitutionTemplates/conversational-search-with-bedrock-rag-tool-template.json new file mode 100644 index 000000000..0c15a84d7 --- /dev/null +++ b/src/main/resources/substitutionTemplates/conversational-search-with-bedrock-rag-tool-template.json @@ -0,0 +1,218 @@ +{ + "name": "${{template.name}}", + "description": "${{template.description}}", + "use_case": "CONVERSATION_SEARCH", + "version": { + "template": "1.0.0", + "compatibility": [ + "2.12.0", + "3.0.0" + ] + }, + "workflows": { + "provision": { + "nodes": [ + { + "id": "create_bedrock_connector", + "type": "create_connector", + "user_inputs": { + "name": "${{create_bedrock_connector.name}}", + "description": "Test connector for Amazon Bedrock", + "version": "1", + "protocol": "${{create_bedrock_connector.protocol}}", + "credential": { + "access_key": "${{create_bedrock_connector.credential.access_key}}", + "secret_key": "${{create_bedrock_connector.credential.secret_key}}", + "session_token": "${{create_bedrock_connector.credential.session_token}}" + }, + "parameters": { + "max_tokens_to_sample": "8000", + "service_name": "bedrock", + "temperature": "1.0E-4", + "response_filter": "$.completion", + "region": "${{create_bedrock_connector.region}}", + "anthropic_version": "bedrock-2023-05-31" + }, + "actions": [ + { + "action_type": "PREDICT", + "method": "POST", + "url": "${{create_bedrock_connector.actions.url}}", + "headers": { + "x-amz-content-sha256": "required", + "content-type": "application/json" + }, + "request_body": "${{create_bedrock_connector.actions.request_body}}" + } + ] + } + }, + { + "id": "create_embedding_connector", + "type": "create_connector", + "user_inputs": { + "name": "${{create_embedding_connector.name}}", + "description": "${{create_embedding_connector.description}}", + "version": "1", + "protocol": "${{create_embedding_connector.protocol}}", + "credential": { + "access_key": "${{create_bedrock_connector.credential.access_key}}", + "secret_key": "${{create_bedrock_connector.credential.secret_key}}", + "session_token": "${{create_bedrock_connector.credential.session_token}}" + }, + "parameters": { + "service_name": "bedrock", + "model": "amazon.titan-embed-text-v2:0", + "region": "us-west-2", + "anthropic_version": "bedrock-2023-05-31" + }, + "actions": [ + { + "action_type": "PREDICT", + "method": "POST", + "url": "${{create_embedding_connector.actions.url}}", + "headers": { + "x-amz-content-sha256": "required", + "content-type": "application/json" + }, + "request_body": "${{create_embedding_connector.actions.request_body}}", + "pre_process_function": "connector.pre_process.bedrock.embedding", + "post_process_function": "connector.post_process.bedrock.embedding" + } + ] + } + }, + + { + "id": "register_bedrock_model", + "type": "register_remote_model", + "previous_node_inputs": { + "create_bedrock_connector": "connector_id" + }, + "user_inputs": { + "name": "${{register_bedrock_model.name}}", + "function_name": "remote", + "description": "${{register_bedrock_model.description}}", + "deploy": true + } + }, + { + "id": "register_bedrock_embedding_model", + "type": "register_remote_model", + "previous_node_inputs": { + "create_embedding_connector": "connector_id" + }, + "user_inputs": { + "name": "${{register_bedrock_embedding_model.name}}", + "description": "${{register_bedrock_embedding_model.description}}", + "function_name": "remote", + "deploy": true + } + }, + { + "id": "create_ingest_pipeline", + "type": "create_ingest_pipeline", + "previous_node_inputs": { + "register_bedrock_embedding_model": "model_id" + }, + "user_inputs": { + "pipeline_id": "${{create_ingest_pipeline.pipeline_id}}", + "configurations": { + "description": "${{create_ingest_pipeline.description}}", + "processors": [ + { + "text_embedding": { + "model_id": "${{register_bedrock_embedding_model.model_id}}", + "field_map": { + "${{text_embedding.field_map.input}}": "${{text_embedding.field_map.output}}" + } + } + } + ] + } + } + }, + { + "id": "create_index", + "type": "create_index", + "previous_node_inputs": { + "create_ingest_pipeline": "pipeline_id" + }, + "user_inputs": { + "index_name": "${{create_index.name}}", + "configurations": { + "settings": { + "index": { + "default_pipeline": "${{create_ingest_pipeline.pipeline_id}}", + "knn": "true" + } + }, + "mappings": { + "properties": { + "${{text_embedding.field_map.input}}": { + "type": "text" + }, + "${{text_embedding.field_map.output}}": { + "type": "knn_vector", + "method": { + "engine": "${{create_index.mappings.method.engine}}", + "space_type": "${{create_index.mappings.method.space_type}}", + "name": "${{create_index.mappings.method.name}}", + "parameters": {} + }, + "dimension": "${{text_embedding.field_map.output.dimension}}" + } + } + } + } + } + }, + { + "id": "rag_tool", + "type": "create_tool", + "previous_node_inputs": { + "register_bedrock_model": "model_id", + "register_bedrock_embedding_model": "model_id" + }, + "user_inputs": { + "type": "RAGTool", + "name": "RAGTool", + "parameters": { + "inference_model_id": "${{register_bedrock_model.model_id}}", + "embedding_model_id": "${{register_bedrock_embedding_model.model_id}}", + "index": "${{create_index.name}}", + "embedding_field": "${{text_embedding.field_map.output}}", + "source_field": "[\"${{text_embedding.field_map.input}}\"]", + "query_type": "neural", + "input": "${parameters.question}", + "prompt": "${{rag_tool.parameters.prompt}}", + "include_output_in_agent_response": true + } + } + }, + { + "id": "root_agent", + "type": "register_agent", + "previous_node_inputs": { + "rag_tool": "tools" + }, + "user_inputs": { + "parameters": { + "prompt": "${{root_agent.parameters.parameters}}" + }, + "app_type": "chatbot", + "name": "${{root_agent.name}}", + "description": "${{root_agent.description}}", + "tools_order": [ + "rag_tool" + ], + "memory": { + "type": "conversation_index" + }, + "type": "flow" + } + } + ] + } + } +} diff --git a/src/main/resources/substitutionTemplates/conversational-search-with-cohere-model-template.json b/src/main/resources/substitutionTemplates/conversational-search-with-cohere-model-template.json index 9c919f553..bfac79265 100644 --- a/src/main/resources/substitutionTemplates/conversational-search-with-cohere-model-template.json +++ b/src/main/resources/substitutionTemplates/conversational-search-with-cohere-model-template.json @@ -1,7 +1,7 @@ { "name": "${{template.name}}", "description": "${{template.description}}", - "use_case": "SEMANTIC_SEARCH", + "use_case": "CONVERSATION_SEARCH", "version": { "template": "1.0.0", "compatibility": [ From b535d5e5ca87ff532f2ff4733b5556f0cf8eaeb5 Mon Sep 17 00:00:00 2001 From: Amit Galitzky Date: Mon, 10 Jun 2024 09:26:43 -0700 Subject: [PATCH 4/4] Change snapshot workflow jdk to 21 (#733) changed snapshot workflow jdk to 21 Signed-off-by: Amit Galitzky --- .github/workflows/publish-snapshots.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/publish-snapshots.yml b/.github/workflows/publish-snapshots.yml index 7e2719aad..f1b9adea7 100644 --- a/.github/workflows/publish-snapshots.yml +++ b/.github/workflows/publish-snapshots.yml @@ -23,7 +23,7 @@ jobs: - uses: actions/setup-java@v4 with: distribution: temurin # Temurin is a distribution of adoptium - java-version: 17 + java-version: 21 - uses: actions/checkout@v4 - uses: aws-actions/configure-aws-credentials@v4.0.2 with: