From 18eedf66284edcd752e33fe89be76bd19e55e0c8 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Wed, 23 Oct 2024 15:58:43 -0700 Subject: [PATCH 01/29] feat(index mappings): fetch mappings and version from json file instead of string constants Signed-off-by: Pavan Yekbote --- .../org/opensearch/ml/common/CommonValue.java | 539 +----------------- .../ml/common/utils/IndexUtils.java | 20 + .../ml/common/utils/StringUtils.java | 5 + .../MetricsCorrelation.java | 4 +- .../opensearch/ml/engine/indices/MLIndex.java | 83 ++- .../src/main/resources/mappings/ml-agent.json | 45 ++ .../main/resources/mappings/ml-config.json | 24 + .../main/resources/mappings/ml-connector.json | 95 +++ .../resources/mappings/ml-controller.json | 10 + .../resources/mappings/ml-memory-message.json | 35 ++ .../resources/mappings/ml-memory-meta.json | 24 + .../resources/mappings/ml-model-group.json | 83 +++ .../src/main/resources/mappings/ml-model.json | 243 ++++++++ .../resources/mappings/ml-stop-words.json | 0 .../src/main/resources/mappings/ml-task.json | 86 +++ 15 files changed, 735 insertions(+), 561 deletions(-) create mode 100644 ml-algorithms/src/main/resources/mappings/ml-agent.json create mode 100644 ml-algorithms/src/main/resources/mappings/ml-config.json create mode 100644 ml-algorithms/src/main/resources/mappings/ml-connector.json create mode 100644 ml-algorithms/src/main/resources/mappings/ml-controller.json create mode 100644 ml-algorithms/src/main/resources/mappings/ml-memory-message.json create mode 100644 ml-algorithms/src/main/resources/mappings/ml-memory-meta.json create mode 100644 ml-algorithms/src/main/resources/mappings/ml-model-group.json create mode 100644 ml-algorithms/src/main/resources/mappings/ml-model.json create mode 100644 ml-algorithms/src/main/resources/mappings/ml-stop-words.json create mode 100644 ml-algorithms/src/main/resources/mappings/ml-task.json diff --git a/common/src/main/java/org/opensearch/ml/common/CommonValue.java b/common/src/main/java/org/opensearch/ml/common/CommonValue.java index 3adaa8ca2e..09081e4b49 100644 --- a/common/src/main/java/org/opensearch/ml/common/CommonValue.java +++ b/common/src/main/java/org/opensearch/ml/common/CommonValue.java @@ -5,39 +5,9 @@ package org.opensearch.ml.common; -import static org.opensearch.ml.common.MLConfig.CONFIG_TYPE_FIELD; -import static org.opensearch.ml.common.MLConfig.LAST_UPDATED_TIME_FIELD; -import static org.opensearch.ml.common.MLConfig.ML_CONFIGURATION_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.APPLICATION_TYPE_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_ADDITIONAL_INFO_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_CONVERSATION_ID_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_CREATE_TIME_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_INDEX_SCHEMA_VERSION; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_INPUT_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_ORIGIN_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_PROMPT_TEMPLATE_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_RESPONSE_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_TRACE_NUMBER_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.META_CREATED_TIME_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.META_INDEX_SCHEMA_VERSION; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.META_NAME_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.META_UPDATED_TIME_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.PARENT_INTERACTIONS_ID_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.USER_FIELD; -import static org.opensearch.ml.common.model.MLModelConfig.ALL_CONFIG_FIELD; -import static org.opensearch.ml.common.model.MLModelConfig.MODEL_TYPE_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.EMBEDDING_DIMENSION_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.FRAMEWORK_TYPE_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.MODEL_MAX_LENGTH_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.NORMALIZE_RESULT_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.POOLING_MODE_FIELD; - import java.util.Set; import org.opensearch.Version; -import org.opensearch.ml.common.agent.MLAgent; -import org.opensearch.ml.common.connector.AbstractConnector; -import org.opensearch.ml.common.controller.MLController; import com.google.common.collect.ImmutableSet; @@ -63,516 +33,27 @@ public class CommonValue { public static final String ML_MODEL_GROUP_INDEX = ".plugins-ml-model-group"; public static final String ML_MODEL_INDEX = ".plugins-ml-model"; public static final String ML_TASK_INDEX = ".plugins-ml-task"; - public static final Integer ML_MODEL_GROUP_INDEX_SCHEMA_VERSION = 2; - public static final Integer ML_MODEL_INDEX_SCHEMA_VERSION = 11; public static final String ML_CONNECTOR_INDEX = ".plugins-ml-connector"; - public static final Integer ML_TASK_INDEX_SCHEMA_VERSION = 3; - public static final Integer ML_CONNECTOR_SCHEMA_VERSION = 3; public static final String ML_CONFIG_INDEX = ".plugins-ml-config"; - public static final Integer ML_CONFIG_INDEX_SCHEMA_VERSION = 4; public static final String ML_CONTROLLER_INDEX = ".plugins-ml-controller"; - public static final Integer ML_CONTROLLER_INDEX_SCHEMA_VERSION = 1; public static final String ML_MAP_RESPONSE_KEY = "response"; public static final String ML_AGENT_INDEX = ".plugins-ml-agent"; - public static final Integer ML_AGENT_INDEX_SCHEMA_VERSION = 2; public static final String ML_MEMORY_META_INDEX = ".plugins-ml-memory-meta"; - public static final Integer ML_MEMORY_META_INDEX_SCHEMA_VERSION = 1; public static final String ML_MEMORY_MESSAGE_INDEX = ".plugins-ml-memory-message"; public static final String ML_STOP_WORDS_INDEX = ".plugins-ml-stop-words"; public static final Set stopWordsIndices = ImmutableSet.of(".plugins-ml-stop-words"); - public static final Integer ML_MEMORY_MESSAGE_INDEX_SCHEMA_VERSION = 1; - public static final String USER_FIELD_MAPPING = " \"" - + CommonValue.USER - + "\": {\n" - + " \"type\": \"nested\",\n" - + " \"properties\": {\n" - + " \"name\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\", \"ignore_above\":256}}},\n" - + " \"backend_roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"custom_attribute_names\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}}\n" - + " }\n" - + " }\n"; - public static final String ML_MODEL_GROUP_INDEX_MAPPING = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + ML_MODEL_GROUP_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + MLModelGroup.MODEL_GROUP_NAME_FIELD - + "\": {\n" - + " \"type\": \"text\",\n" - + " \"fields\": {\n" - + " \"keyword\": {\n" - + " \"type\": \"keyword\",\n" - + " \"ignore_above\": 256\n" - + " }\n" - + " }\n" - + " },\n" - + " \"" - + MLModelGroup.DESCRIPTION_FIELD - + "\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"" - + MLModelGroup.LATEST_VERSION_FIELD - + "\": {\n" - + " \"type\": \"integer\"\n" - + " },\n" - + " \"" - + MLModelGroup.MODEL_GROUP_ID_FIELD - + "\": {\n" - + " \"type\": \"keyword\"\n" - + " },\n" - + " \"" - + MLModelGroup.BACKEND_ROLES_FIELD - + "\": {\n" - + " \"type\": \"text\",\n" - + " \"fields\": {\n" - + " \"keyword\": {\n" - + " \"type\": \"keyword\",\n" - + " \"ignore_above\": 256\n" - + " }\n" - + " }\n" - + " },\n" - + " \"" - + MLModelGroup.ACCESS - + "\": {\n" - + " \"type\": \"keyword\"\n" - + " },\n" - + " \"" - + MLModelGroup.OWNER - + "\": {\n" - + " \"type\": \"nested\",\n" - + " \"properties\": {\n" - + " \"name\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\", \"ignore_above\":256}}},\n" - + " \"backend_roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"custom_attribute_names\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}}\n" - + " }\n" - + " },\n" - + " \"" - + MLModelGroup.CREATED_TIME_FIELD - + "\": {\n" - + " \"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModelGroup.LAST_UPDATED_TIME_FIELD - + "\": {\n" - + " \"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"}\n" - + " }\n" - + "}"; - - public static final String ML_CONNECTOR_INDEX_FIELDS = " \"properties\": {\n" - + " \"" - + AbstractConnector.NAME_FIELD - + "\" : {\"type\":\"text\",\"fields\":{\"keyword\":{\"type\":\"keyword\",\"ignore_above\":256}}},\n" - + " \"" - + AbstractConnector.VERSION_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + AbstractConnector.DESCRIPTION_FIELD - + "\" : {\"type\": \"text\"},\n" - + " \"" - + AbstractConnector.PROTOCOL_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + AbstractConnector.PARAMETERS_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + AbstractConnector.CREDENTIAL_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + AbstractConnector.CLIENT_CONFIG_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + AbstractConnector.ACTIONS_FIELD - + "\" : {\"type\": \"flat_object\"}\n"; - - public static final String ML_MODEL_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_MODEL_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MLModel.ALGORITHM_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_NAME_FIELD - + "\" : {\"type\":\"text\",\"fields\":{\"keyword\":{\"type\":\"keyword\",\"ignore_above\":256}}},\n" - + " \"" - + MLModel.OLD_MODEL_VERSION_FIELD - + "\" : {\"type\": \"long\"},\n" - + " \"" - + MLModel.MODEL_VERSION_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_GROUP_ID_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_CONTENT_FIELD - + "\" : {\"type\": \"binary\"},\n" - + " \"" - + MLModel.CHUNK_NUMBER_FIELD - + "\" : {\"type\": \"long\"},\n" - + " \"" - + MLModel.TOTAL_CHUNKS_FIELD - + "\" : {\"type\": \"long\"},\n" - + " \"" - + MLModel.MODEL_ID_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.DESCRIPTION_FIELD - + "\" : {\"type\": \"text\"},\n" - + " \"" - + MLModel.MODEL_FORMAT_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_STATE_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_CONTENT_SIZE_IN_BYTES_FIELD - + "\" : {\"type\": \"long\"},\n" - + " \"" - + MLModel.PLANNING_WORKER_NODE_COUNT_FIELD - + "\" : {\"type\": \"integer\"},\n" - + " \"" - + MLModel.CURRENT_WORKER_NODE_COUNT_FIELD - + "\" : {\"type\": \"integer\"},\n" - + " \"" - + MLModel.PLANNING_WORKER_NODES_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.DEPLOY_TO_ALL_NODES_FIELD - + "\": {\"type\": \"boolean\"},\n" - + " \"" - + MLModel.IS_HIDDEN_FIELD - + "\": {\"type\": \"boolean\"},\n" - + " \"" - + MLModel.MODEL_CONFIG_FIELD - + "\" : {\"properties\":{\"" - + MODEL_TYPE_FIELD - + "\":{\"type\":\"keyword\"},\"" - + EMBEDDING_DIMENSION_FIELD - + "\":{\"type\":\"integer\"},\"" - + FRAMEWORK_TYPE_FIELD - + "\":{\"type\":\"keyword\"},\"" - + POOLING_MODE_FIELD - + "\":{\"type\":\"keyword\"},\"" - + NORMALIZE_RESULT_FIELD - + "\":{\"type\":\"boolean\"},\"" - + MODEL_MAX_LENGTH_FIELD - + "\":{\"type\":\"integer\"},\"" - + ALL_CONFIG_FIELD - + "\":{\"type\":\"text\"}}},\n" - + " \"" - + MLModel.DEPLOY_SETTING_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLModel.IS_ENABLED_FIELD - + "\" : {\"type\": \"boolean\"},\n" - + " \"" - + MLModel.IS_CONTROLLER_ENABLED_FIELD - + "\" : {\"type\": \"boolean\"},\n" - + " \"" - + MLModel.RATE_LIMITER_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLModel.MODEL_CONTENT_HASH_VALUE_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.AUTO_REDEPLOY_RETRY_TIMES_FIELD - + "\" : {\"type\": \"integer\"},\n" - + " \"" - + MLModel.CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.LAST_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.LAST_REGISTERED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.LAST_DEPLOYED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.LAST_UNDEPLOYED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.INTERFACE_FIELD - + "\": {\"type\": \"flat_object\"},\n" - + " \"" - + MLModel.GUARDRAILS_FIELD - + "\" : {\n" - + " \"properties\": {\n" - + " \"input_guardrail\": {\n" - + " \"properties\": {\n" - + " \"regex\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"stop_words\": {\n" - + " \"properties\": {\n" - + " \"index_name\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"source_fields\": {\n" - + " \"type\": \"text\"\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " },\n" - + " \"output_guardrail\": {\n" - + " \"properties\": {\n" - + " \"regex\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"stop_words\": {\n" - + " \"properties\": {\n" - + " \"index_name\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"source_fields\": {\n" - + " \"type\": \"text\"\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " },\n" - + " \"" - + MLModel.CONNECTOR_FIELD - + "\": {" - + ML_CONNECTOR_INDEX_FIELDS - + " }\n}," - + USER_FIELD_MAPPING - + " }\n" - + "}"; - - public static final String ML_TASK_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_TASK_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MLTask.MODEL_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.TASK_TYPE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.FUNCTION_NAME_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.STATE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.INPUT_TYPE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.PROGRESS_FIELD - + "\": {\"type\": \"float\"},\n" - + " \"" - + MLTask.OUTPUT_INDEX_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.WORKER_NODE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.CREATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLTask.LAST_UPDATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLTask.ERROR_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + MLTask.IS_ASYNC_TASK_FIELD - + "\" : {\"type\" : \"boolean\"}, \n" - + " \"" - + MLTask.REMOTE_JOB_FIELD - + "\" : {\"type\": \"flat_object\"}, \n" - + USER_FIELD_MAPPING - + " }\n" - + "}"; - - public static final String ML_CONNECTOR_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_CONNECTOR_SCHEMA_VERSION - + "},\n" - + ML_CONNECTOR_INDEX_FIELDS - + ",\n" - + " \"" - + MLModelGroup.BACKEND_ROLES_FIELD - + "\": {\n" - + " \"type\": \"text\",\n" - + " \"fields\": {\n" - + " \"keyword\": {\n" - + " \"type\": \"keyword\",\n" - + " \"ignore_above\": 256\n" - + " }\n" - + " }\n" - + " },\n" - + " \"" - + MLModelGroup.ACCESS - + "\": {\n" - + " \"type\": \"keyword\"\n" - + " },\n" - + " \"" - + MLModelGroup.OWNER - + "\": {\n" - + " \"type\": \"nested\",\n" - + " \"properties\": {\n" - + " \"name\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\", \"ignore_above\":256}}},\n" - + " \"backend_roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"custom_attribute_names\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}}\n" - + " }\n" - + " },\n" - + " \"" - + AbstractConnector.CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + AbstractConnector.LAST_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"}\n" - + " }\n" - + "}"; - - public static final String ML_CONFIG_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_CONFIG_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MASTER_KEY - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + CONFIG_TYPE_FIELD - + "\" : {\"type\":\"keyword\"},\n" - + " \"" - + ML_CONFIGURATION_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + CREATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + LAST_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"}\n" - + " }\n" - + "}"; - - public static final String ML_CONTROLLER_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_CONTROLLER_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MLController.USER_RATE_LIMITER - + "\" : {\"type\": \"flat_object\"}\n" - + " }\n" - + "}"; - - public static final String ML_AGENT_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_AGENT_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MLAgent.AGENT_NAME_FIELD - + "\" : {\"type\":\"text\",\"fields\":{\"keyword\":{\"type\":\"keyword\",\"ignore_above\":256}}},\n" - + " \"" - + MLAgent.AGENT_TYPE_FIELD - + "\" : {\"type\":\"keyword\"},\n" - + " \"" - + MLAgent.DESCRIPTION_FIELD - + "\" : {\"type\": \"text\"},\n" - + " \"" - + MLAgent.LLM_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLAgent.TOOLS_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLAgent.PARAMETERS_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLAgent.MEMORY_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLAgent.IS_HIDDEN_FIELD - + "\": {\"type\": \"boolean\"},\n" - + " \"" - + MLAgent.CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLAgent.LAST_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"}\n" - + " }\n" - + "}"; - public static final String ML_MEMORY_META_INDEX_MAPPING = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + META_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + META_NAME_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + META_CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + META_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + USER_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + APPLICATION_TYPE_FIELD - + "\": {\"type\": \"keyword\"}\n" - + " }\n" - + "}"; + // Index mapping paths + public static final String ML_MODEL_GROUP_INDEX_MAPPING_PATH = "mappings/ml-model-group.json"; + public static final String ML_MODEL_INDEX_MAPPING_PATH = "mappings/ml-model.json"; + public static final String ML_TASK_INDEX_MAPPING_PATH = "mappings/ml-task.json"; + public static final String ML_CONNECTOR_INDEX_MAPPING_PATH = "mappings/ml-connector.json"; + public static final String ML_CONFIG_INDEX_MAPPING_PATH = "mappings/ml-config.json"; + public static final String ML_CONTROLLER_INDEX_MAPPING_PATH = "mappings/ml-controller.json"; + public static final String ML_AGENT_INDEX_MAPPING_PATH = "mappings/ml-agent.json"; + public static final String ML_MEMORY_META_INDEX_MAPPING_PATH = "mappings/ml-memory-meta.json"; + public static final String ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH = "mappings/ml-memory-message.json"; - public static final String ML_MEMORY_MESSAGE_INDEX_MAPPING = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + INTERACTIONS_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + INTERACTIONS_CONVERSATION_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_CREATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + INTERACTIONS_INPUT_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_PROMPT_TEMPLATE_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_RESPONSE_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_ORIGIN_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_ADDITIONAL_INFO_FIELD - + "\": {\"type\": \"flat_object\"},\n" - + " \"" - + PARENT_INTERACTIONS_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_TRACE_NUMBER_FIELD - + "\": {\"type\": \"long\"}\n" - + " }\n" - + "}"; // Calculate Versions independently of OpenSearch core version public static final Version VERSION_2_11_0 = Version.fromString("2.11.0"); public static final Version VERSION_2_12_0 = Version.fromString("2.12.0"); diff --git a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java index 298bd3ec96..04d539ec8d 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java @@ -5,8 +5,14 @@ package org.opensearch.ml.common.utils; +import java.io.IOException; +import java.net.URL; import java.util.Map; +import com.fasterxml.jackson.core.JsonParseException; +import com.google.common.base.Charsets; +import com.google.common.io.Resources; + import lombok.extern.log4j.Log4j2; @Log4j2 @@ -32,4 +38,18 @@ public class IndexUtils { // Note: This does not include static settings like number of shards, which can't be changed after index creation. public static final Map UPDATED_DEFAULT_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-1"); public static final Map UPDATED_ALL_NODES_REPLICA_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-all"); + + public static String getMappingFromFile(String path) throws IOException { + URL url = IndexUtils.class.getClassLoader().getResource(path); + if (url == null) { + throw new IOException("Resource not found: " + path); + } + + String mapping = Resources.toString(url, Charsets.UTF_8); + if (!StringUtils.isJson(mapping)) { + throw new JsonParseException("Mapping is not a valid JSON: " + path); + } + + return mapping; + } } diff --git a/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java index 37bfac6f3f..81e93580a0 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java @@ -26,6 +26,7 @@ import com.google.gson.Gson; import com.google.gson.JsonElement; +import com.google.gson.JsonObject; import com.google.gson.JsonParser; import com.google.gson.JsonSyntaxException; import com.jayway.jsonpath.JsonPath; @@ -319,4 +320,8 @@ public static boolean isValidJSONPath(String input) { } } + public static JsonObject getJsonObjectFromString(String jsonString) { + return JsonParser.parseString(jsonString).getAsJsonObject(); + } + } diff --git a/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java b/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java index e6a15ecdae..82beb87f94 100644 --- a/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java +++ b/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java @@ -8,7 +8,6 @@ import static org.opensearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE; import static org.opensearch.index.query.QueryBuilders.termQuery; import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX_MAPPING; import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX; import static org.opensearch.ml.common.MLModel.MODEL_STATE_FIELD; @@ -69,6 +68,7 @@ import org.opensearch.ml.common.transport.task.MLTaskGetResponse; import org.opensearch.ml.engine.algorithms.DLModelExecute; import org.opensearch.ml.engine.annotation.Function; +import org.opensearch.ml.engine.indices.MLIndex; import org.opensearch.search.builder.SearchSourceBuilder; import com.google.common.annotations.VisibleForTesting; @@ -131,7 +131,7 @@ public void execute(Input input, ActionListener Date: Wed, 23 Oct 2024 16:05:30 -0700 Subject: [PATCH 02/29] refactor: changing exception being thrown Signed-off-by: Pavan Yekbote --- .../main/java/org/opensearch/ml/common/utils/IndexUtils.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java index 04d539ec8d..f3e5836a6f 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java @@ -9,10 +9,10 @@ import java.net.URL; import java.util.Map; -import com.fasterxml.jackson.core.JsonParseException; import com.google.common.base.Charsets; import com.google.common.io.Resources; +import com.google.gson.JsonSyntaxException; import lombok.extern.log4j.Log4j2; @Log4j2 @@ -47,7 +47,7 @@ public static String getMappingFromFile(String path) throws IOException { String mapping = Resources.toString(url, Charsets.UTF_8); if (!StringUtils.isJson(mapping)) { - throw new JsonParseException("Mapping is not a valid JSON: " + path); + throw new JsonSyntaxException("Mapping is not a valid JSON: " + path); } return mapping; From abd4b2587ac2303fffad804f1c6963e3c719bb0d Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Wed, 23 Oct 2024 16:08:57 -0700 Subject: [PATCH 03/29] chore: remove unused file Signed-off-by: Pavan Yekbote --- ml-algorithms/src/main/resources/mappings/ml-stop-words.json | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 ml-algorithms/src/main/resources/mappings/ml-stop-words.json diff --git a/ml-algorithms/src/main/resources/mappings/ml-stop-words.json b/ml-algorithms/src/main/resources/mappings/ml-stop-words.json deleted file mode 100644 index e69de29bb2..0000000000 From d2f3c0e693efa5a14c7d159dfccf691dedc894ed Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Wed, 23 Oct 2024 16:11:56 -0700 Subject: [PATCH 04/29] chore: fix typo in comment Signed-off-by: Pavan Yekbote --- .../src/main/java/org/opensearch/ml/engine/indices/MLIndex.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java b/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java index 7fa798aee8..69d94439c7 100644 --- a/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java +++ b/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java @@ -61,7 +61,7 @@ private String getMapping(String mappingPath) { try { return IndexUtils.getMappingFromFile(mappingPath); } catch (IOException e) { - // Unchecked exception is throw since the method is being called within a constructor + // Unchecked exception is thrown since the method is being called within a constructor throw new UncheckedIOException("Failed to fetch index mapping from file: " + mappingPath, e); } } From fa81560d96e807afd08bf420b1cd1fdb49498149 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Wed, 23 Oct 2024 16:16:49 -0700 Subject: [PATCH 05/29] chore: adding new line at the end of files Signed-off-by: Pavan Yekbote --- ml-algorithms/src/main/resources/mappings/ml-agent.json | 2 +- ml-algorithms/src/main/resources/mappings/ml-config.json | 2 +- ml-algorithms/src/main/resources/mappings/ml-connector.json | 2 +- ml-algorithms/src/main/resources/mappings/ml-controller.json | 2 +- .../src/main/resources/mappings/ml-memory-message.json | 2 +- ml-algorithms/src/main/resources/mappings/ml-memory-meta.json | 2 +- ml-algorithms/src/main/resources/mappings/ml-model-group.json | 2 +- ml-algorithms/src/main/resources/mappings/ml-model.json | 2 +- ml-algorithms/src/main/resources/mappings/ml-task.json | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ml-algorithms/src/main/resources/mappings/ml-agent.json b/ml-algorithms/src/main/resources/mappings/ml-agent.json index 4bdefab5b2..2bcee6bc3b 100644 --- a/ml-algorithms/src/main/resources/mappings/ml-agent.json +++ b/ml-algorithms/src/main/resources/mappings/ml-agent.json @@ -42,4 +42,4 @@ "format": "strict_date_time||epoch_millis" } } -} \ No newline at end of file +} diff --git a/ml-algorithms/src/main/resources/mappings/ml-config.json b/ml-algorithms/src/main/resources/mappings/ml-config.json index 43622d1cd6..6d36d8efb7 100644 --- a/ml-algorithms/src/main/resources/mappings/ml-config.json +++ b/ml-algorithms/src/main/resources/mappings/ml-config.json @@ -21,4 +21,4 @@ "format": "strict_date_time||epoch_millis" } } -} \ No newline at end of file +} diff --git a/ml-algorithms/src/main/resources/mappings/ml-connector.json b/ml-algorithms/src/main/resources/mappings/ml-connector.json index ec1ac79447..4be168c4b9 100644 --- a/ml-algorithms/src/main/resources/mappings/ml-connector.json +++ b/ml-algorithms/src/main/resources/mappings/ml-connector.json @@ -92,4 +92,4 @@ "format": "strict_date_time||epoch_millis" } } -} \ No newline at end of file +} diff --git a/ml-algorithms/src/main/resources/mappings/ml-controller.json b/ml-algorithms/src/main/resources/mappings/ml-controller.json index 2617aef4fe..6822fb19c5 100644 --- a/ml-algorithms/src/main/resources/mappings/ml-controller.json +++ b/ml-algorithms/src/main/resources/mappings/ml-controller.json @@ -7,4 +7,4 @@ "type": "flat_object" } } -} \ No newline at end of file +} diff --git a/ml-algorithms/src/main/resources/mappings/ml-memory-message.json b/ml-algorithms/src/main/resources/mappings/ml-memory-message.json index 448ceb07a9..10b081aee1 100644 --- a/ml-algorithms/src/main/resources/mappings/ml-memory-message.json +++ b/ml-algorithms/src/main/resources/mappings/ml-memory-message.json @@ -32,4 +32,4 @@ "type": "long" } } -} \ No newline at end of file +} diff --git a/ml-algorithms/src/main/resources/mappings/ml-memory-meta.json b/ml-algorithms/src/main/resources/mappings/ml-memory-meta.json index 0644328025..e091d478a5 100644 --- a/ml-algorithms/src/main/resources/mappings/ml-memory-meta.json +++ b/ml-algorithms/src/main/resources/mappings/ml-memory-meta.json @@ -21,4 +21,4 @@ "type": "keyword" } } -} \ No newline at end of file +} diff --git a/ml-algorithms/src/main/resources/mappings/ml-model-group.json b/ml-algorithms/src/main/resources/mappings/ml-model-group.json index d02b599199..7e2437e534 100644 --- a/ml-algorithms/src/main/resources/mappings/ml-model-group.json +++ b/ml-algorithms/src/main/resources/mappings/ml-model-group.json @@ -80,4 +80,4 @@ "format": "strict_date_time||epoch_millis" } } -} \ No newline at end of file +} diff --git a/ml-algorithms/src/main/resources/mappings/ml-model.json b/ml-algorithms/src/main/resources/mappings/ml-model.json index 8a064273ec..b996e463cd 100644 --- a/ml-algorithms/src/main/resources/mappings/ml-model.json +++ b/ml-algorithms/src/main/resources/mappings/ml-model.json @@ -240,4 +240,4 @@ } } } -} \ No newline at end of file +} diff --git a/ml-algorithms/src/main/resources/mappings/ml-task.json b/ml-algorithms/src/main/resources/mappings/ml-task.json index c17bfd22c4..ad428724bf 100644 --- a/ml-algorithms/src/main/resources/mappings/ml-task.json +++ b/ml-algorithms/src/main/resources/mappings/ml-task.json @@ -83,4 +83,4 @@ } } } -} \ No newline at end of file +} From 63a6d62f48bca1b2d2502571fdab19d942ea5b53 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Wed, 23 Oct 2024 17:11:36 -0700 Subject: [PATCH 06/29] feat: add test cases Signed-off-by: Pavan Yekbote --- .../org/opensearch/ml/common/CommonValue.java | 2 +- .../ml/common/utils/IndexUtils.java | 18 ++- .../ml/common/utils/IndexUtilsTest.java | 110 ++++++++++++++++++ .../mappings/test-mapping-malformed.json | 13 +++ .../test/resources/mappings/test-mapping.json | 16 +++ .../opensearch/ml/engine/indices/MLIndex.java | 20 +--- 6 files changed, 158 insertions(+), 21 deletions(-) create mode 100644 common/src/test/resources/mappings/test-mapping-malformed.json create mode 100644 common/src/test/resources/mappings/test-mapping.json diff --git a/common/src/main/java/org/opensearch/ml/common/CommonValue.java b/common/src/main/java/org/opensearch/ml/common/CommonValue.java index 09081e4b49..7486d11f25 100644 --- a/common/src/main/java/org/opensearch/ml/common/CommonValue.java +++ b/common/src/main/java/org/opensearch/ml/common/CommonValue.java @@ -48,7 +48,7 @@ public class CommonValue { public static final String ML_MODEL_INDEX_MAPPING_PATH = "mappings/ml-model.json"; public static final String ML_TASK_INDEX_MAPPING_PATH = "mappings/ml-task.json"; public static final String ML_CONNECTOR_INDEX_MAPPING_PATH = "mappings/ml-connector.json"; - public static final String ML_CONFIG_INDEX_MAPPING_PATH = "mappings/ml-config.json"; + public static final String ML_CONFIG_INDEX_MAPPING_PATH = "mappings/ml-config-random.json"; public static final String ML_CONTROLLER_INDEX_MAPPING_PATH = "mappings/ml-controller.json"; public static final String ML_AGENT_INDEX_MAPPING_PATH = "mappings/ml-agent.json"; public static final String ML_MEMORY_META_INDEX_MAPPING_PATH = "mappings/ml-memory-meta.json"; diff --git a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java index f3e5836a6f..bbe76fb838 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java @@ -11,8 +11,10 @@ import com.google.common.base.Charsets; import com.google.common.io.Resources; - +import com.google.gson.JsonObject; +import com.google.gson.JsonParseException; import com.google.gson.JsonSyntaxException; + import lombok.extern.log4j.Log4j2; @Log4j2 @@ -52,4 +54,18 @@ public static String getMappingFromFile(String path) throws IOException { return mapping; } + + public static Integer getVersionFromMapping(String mapping) { + JsonObject mappingJson = StringUtils.getJsonObjectFromString(mapping); + if (mappingJson == null || !mappingJson.has("_meta")) { + throw new JsonParseException("Failed to find \"_meta\" object in mapping: " + mapping); + } + + JsonObject metaObject = mappingJson.getAsJsonObject("_meta"); + if (metaObject == null || !metaObject.has("schema_version")) { + throw new JsonParseException("Failed to find \"schema_version\" in \"_meta\" object for mapping: " + mapping); + } + + return metaObject.get("schema_version").getAsInt(); + } } diff --git a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java index 8cfad37c98..a11b2e200f 100644 --- a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java +++ b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java @@ -6,11 +6,16 @@ package org.opensearch.ml.common.utils; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import java.io.IOException; import java.util.Map; import org.junit.Test; +import com.google.gson.JsonParseException; +import com.google.gson.JsonSyntaxException; + public class IndexUtilsTest { @Test @@ -42,4 +47,109 @@ public void testUpdatedAllNodesReplicaIndexSettingsContainsExpectedValues() { assertEquals("index.auto_expand_replicas should be 0-all", updatedIndexSettings.get("index.auto_expand_replicas"), "0-all"); assertEquals("INDEX_SETTINGS should contain exactly 1 settings", 1, updatedIndexSettings.size()); } + + @Test + public void testGetMappingFromFile() { + String expectedMapping = "{\n" + + " \"_meta\": {\n" + + " \"schema_version\": \"1\"\n" + + " },\n" + + " \"properties\": {\n" + + " \"test_field_1\": {\n" + + " \"type\": \"test_type_1\"\n" + + " },\n" + + " \"test_field_2\": {\n" + + " \"type\": \"test_type_2\"\n" + + " },\n" + + " \"test_field_3\": {\n" + + " \"type\": \"test_type_3\"\n" + + " }\n" + + " }\n" + + "}\n"; + try { + String actualMapping = IndexUtils.getMappingFromFile("mappings/test-mapping.json"); + assertEquals(expectedMapping, actualMapping); + } catch (IOException e) { + throw new RuntimeException("Failed to read file at path: mappings/test-mapping.json"); + } + } + + @Test + public void testGetMappingFromFileFileNotFound() { + String path = "mappings/test-mapping-not-found.json"; + IOException e = assertThrows(IOException.class, () -> IndexUtils.getMappingFromFile(path)); + assertEquals("Resource not found: " + path, e.getMessage()); + } + + @Test + public void testGetMappingFromFilesMalformedJson() { + String path = "mappings/test-mapping-malformed.json"; + JsonSyntaxException e = assertThrows(JsonSyntaxException.class, () -> IndexUtils.getMappingFromFile(path)); + assertEquals("Mapping is not a valid JSON: " + path, e.getMessage()); + } + + @Test + public void testGetVersionFromMapping() { + Integer expectedVersion = 1; + String mapping = "{\n" + + " \"_meta\": {\n" + + " \"schema_version\": \"1\"\n" + + " },\n" + + " \"properties\": {\n" + + " \"test_field_1\": {\n" + + " \"type\": \"test_type_1\"\n" + + " },\n" + + " \"test_field_2\": {\n" + + " \"type\": \"test_type_2\"\n" + + " },\n" + + " \"test_field_3\": {\n" + + " \"type\": \"test_type_3\"\n" + + " }\n" + + " }\n" + + "}\n"; + + assertEquals(expectedVersion, IndexUtils.getVersionFromMapping(mapping)); + } + + @Test + public void testGetVersionFromMappingNoMeta() { + String mapping = "{\n" + + " \"properties\": {\n" + + " \"test_field_1\": {\n" + + " \"type\": \"test_type_1\"\n" + + " },\n" + + " \"test_field_2\": {\n" + + " \"type\": \"test_type_2\"\n" + + " },\n" + + " \"test_field_3\": {\n" + + " \"type\": \"test_type_3\"\n" + + " }\n" + + " }\n" + + "}\n"; + + JsonParseException e = assertThrows(JsonParseException.class, () -> IndexUtils.getVersionFromMapping(mapping)); + assertEquals("Failed to find \"_meta\" object in mapping: " + mapping, e.getMessage()); + } + + @Test + public void testGetVersionFromMappingNoSchemaVersion() { + String mapping = "{\n" + + " \"_meta\": {\n" + + " },\n" + + " \"properties\": {\n" + + " \"test_field_1\": {\n" + + " \"type\": \"test_type_1\"\n" + + " },\n" + + " \"test_field_2\": {\n" + + " \"type\": \"test_type_2\"\n" + + " },\n" + + " \"test_field_3\": {\n" + + " \"type\": \"test_type_3\"\n" + + " }\n" + + " }\n" + + "}\n"; + + JsonParseException e = assertThrows(JsonParseException.class, () -> IndexUtils.getVersionFromMapping(mapping)); + assertEquals("Failed to find \"schema_version\" in \"_meta\" object for mapping: " + mapping, e.getMessage()); + } } diff --git a/common/src/test/resources/mappings/test-mapping-malformed.json b/common/src/test/resources/mappings/test-mapping-malformed.json new file mode 100644 index 0000000000..f87e98da9b --- /dev/null +++ b/common/src/test/resources/mappings/test-mapping-malformed.json @@ -0,0 +1,13 @@ +{ + "_meta": { + "schema_version": "1" + }, + "properties": { + "test_field_1": { + "type": "test_type_1" + }, + { + "malformed": } + } + } +} diff --git a/common/src/test/resources/mappings/test-mapping.json b/common/src/test/resources/mappings/test-mapping.json new file mode 100644 index 0000000000..6114de4687 --- /dev/null +++ b/common/src/test/resources/mappings/test-mapping.json @@ -0,0 +1,16 @@ +{ + "_meta": { + "schema_version": "1" + }, + "properties": { + "test_field_1": { + "type": "test_type_1" + }, + "test_field_2": { + "type": "test_type_2" + }, + "test_field_3": { + "type": "test_type_3" + } + } +} diff --git a/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java b/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java index 69d94439c7..4332eafe2e 100644 --- a/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java +++ b/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java @@ -28,10 +28,6 @@ import java.io.UncheckedIOException; import org.opensearch.ml.common.utils.IndexUtils; -import org.opensearch.ml.common.utils.StringUtils; - -import com.google.gson.JsonObject; -import com.google.gson.JsonParseException; public enum MLIndex { MODEL_GROUP(ML_MODEL_GROUP_INDEX, false, ML_MODEL_GROUP_INDEX_MAPPING_PATH), @@ -54,7 +50,7 @@ public enum MLIndex { this.indexName = name; this.alias = alias; this.mapping = getMapping(mappingPath); - this.version = getVersionFromMapping(this.mapping); + this.version = IndexUtils.getVersionFromMapping(this.mapping); } private String getMapping(String mappingPath) { @@ -66,20 +62,6 @@ private String getMapping(String mappingPath) { } } - private Integer getVersionFromMapping(String mapping) { - JsonObject mappingJson = StringUtils.getJsonObjectFromString(mapping); - if (mappingJson == null || !mappingJson.has("_meta")) { - throw new JsonParseException("Failed to find \"_meta\" object in mapping: " + mapping); - } - - JsonObject metaObject = mappingJson.getAsJsonObject("_meta"); - if (metaObject == null || !metaObject.has("schema_version")) { - throw new JsonParseException("Failed to find \"schema_version\" in \"_meta\" object for mapping: " + mapping); - } - - return metaObject.get("schema_version").getAsInt(); - } - public String getIndexName() { return indexName; } From 8cbaf01aa175a49a40cef7b14f2eb6d0ae9a74b7 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Thu, 24 Oct 2024 10:48:33 -0700 Subject: [PATCH 07/29] fix: remove test code Signed-off-by: Pavan Yekbote --- common/src/main/java/org/opensearch/ml/common/CommonValue.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/src/main/java/org/opensearch/ml/common/CommonValue.java b/common/src/main/java/org/opensearch/ml/common/CommonValue.java index 7486d11f25..09081e4b49 100644 --- a/common/src/main/java/org/opensearch/ml/common/CommonValue.java +++ b/common/src/main/java/org/opensearch/ml/common/CommonValue.java @@ -48,7 +48,7 @@ public class CommonValue { public static final String ML_MODEL_INDEX_MAPPING_PATH = "mappings/ml-model.json"; public static final String ML_TASK_INDEX_MAPPING_PATH = "mappings/ml-task.json"; public static final String ML_CONNECTOR_INDEX_MAPPING_PATH = "mappings/ml-connector.json"; - public static final String ML_CONFIG_INDEX_MAPPING_PATH = "mappings/ml-config-random.json"; + public static final String ML_CONFIG_INDEX_MAPPING_PATH = "mappings/ml-config.json"; public static final String ML_CONTROLLER_INDEX_MAPPING_PATH = "mappings/ml-controller.json"; public static final String ML_AGENT_INDEX_MAPPING_PATH = "mappings/ml-agent.json"; public static final String ML_MEMORY_META_INDEX_MAPPING_PATH = "mappings/ml-memory-meta.json"; From 342302667768265f883afefb998d33a5a9cf05a7 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Thu, 24 Oct 2024 16:06:07 -0700 Subject: [PATCH 08/29] fix(test): in main the versions were not updated appropriately Signed-off-by: Pavan Yekbote --- .../org/opensearch/ml/engine/indices/MLIndicesHandlerTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-algorithms/src/test/java/org/opensearch/ml/engine/indices/MLIndicesHandlerTest.java b/ml-algorithms/src/test/java/org/opensearch/ml/engine/indices/MLIndicesHandlerTest.java index 021397fae0..2026a203b9 100644 --- a/ml-algorithms/src/test/java/org/opensearch/ml/engine/indices/MLIndicesHandlerTest.java +++ b/ml-algorithms/src/test/java/org/opensearch/ml/engine/indices/MLIndicesHandlerTest.java @@ -96,7 +96,7 @@ public void setUp() { when(agentindexMetadata.mapping()).thenReturn(agentmappingMetadata); when(memorymetaindexMetadata.mapping()).thenReturn(memorymappingMetadata); when(agentmappingMetadata.getSourceAsMap()).thenReturn(Map.of(META, Map.of(SCHEMA_VERSION_FIELD, Integer.valueOf(2)))); - when(memorymappingMetadata.getSourceAsMap()).thenReturn(Map.of(META, Map.of(SCHEMA_VERSION_FIELD, Integer.valueOf(1)))); + when(memorymappingMetadata.getSourceAsMap()).thenReturn(Map.of(META, Map.of(SCHEMA_VERSION_FIELD, Integer.valueOf(2)))); settings = Settings.builder().put("test_key", 10).build(); threadContext = new ThreadContext(settings); when(client.threadPool()).thenReturn(threadPool); From 67a1810ae0d53a41401395e724fffe494dcc464a Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Fri, 25 Oct 2024 11:45:53 -0700 Subject: [PATCH 09/29] refactor: move mapping templates under common module Signed-off-by: Pavan Yekbote --- .../src/main/resources/mappings/ml-agent.json | 0 .../src/main/resources/mappings/ml-config.json | 0 .../src/main/resources/mappings/ml-connector.json | 0 .../src/main/resources/mappings/ml-controller.json | 0 .../src/main/resources/mappings/ml-memory-message.json | 0 .../src/main/resources/mappings/ml-memory-meta.json | 0 .../src/main/resources/mappings/ml-model-group.json | 0 .../src/main/resources/mappings/ml-model.json | 0 .../src/main/resources/mappings/ml-task.json | 0 9 files changed, 0 insertions(+), 0 deletions(-) rename {ml-algorithms => common}/src/main/resources/mappings/ml-agent.json (100%) rename {ml-algorithms => common}/src/main/resources/mappings/ml-config.json (100%) rename {ml-algorithms => common}/src/main/resources/mappings/ml-connector.json (100%) rename {ml-algorithms => common}/src/main/resources/mappings/ml-controller.json (100%) rename {ml-algorithms => common}/src/main/resources/mappings/ml-memory-message.json (100%) rename {ml-algorithms => common}/src/main/resources/mappings/ml-memory-meta.json (100%) rename {ml-algorithms => common}/src/main/resources/mappings/ml-model-group.json (100%) rename {ml-algorithms => common}/src/main/resources/mappings/ml-model.json (100%) rename {ml-algorithms => common}/src/main/resources/mappings/ml-task.json (100%) diff --git a/ml-algorithms/src/main/resources/mappings/ml-agent.json b/common/src/main/resources/mappings/ml-agent.json similarity index 100% rename from ml-algorithms/src/main/resources/mappings/ml-agent.json rename to common/src/main/resources/mappings/ml-agent.json diff --git a/ml-algorithms/src/main/resources/mappings/ml-config.json b/common/src/main/resources/mappings/ml-config.json similarity index 100% rename from ml-algorithms/src/main/resources/mappings/ml-config.json rename to common/src/main/resources/mappings/ml-config.json diff --git a/ml-algorithms/src/main/resources/mappings/ml-connector.json b/common/src/main/resources/mappings/ml-connector.json similarity index 100% rename from ml-algorithms/src/main/resources/mappings/ml-connector.json rename to common/src/main/resources/mappings/ml-connector.json diff --git a/ml-algorithms/src/main/resources/mappings/ml-controller.json b/common/src/main/resources/mappings/ml-controller.json similarity index 100% rename from ml-algorithms/src/main/resources/mappings/ml-controller.json rename to common/src/main/resources/mappings/ml-controller.json diff --git a/ml-algorithms/src/main/resources/mappings/ml-memory-message.json b/common/src/main/resources/mappings/ml-memory-message.json similarity index 100% rename from ml-algorithms/src/main/resources/mappings/ml-memory-message.json rename to common/src/main/resources/mappings/ml-memory-message.json diff --git a/ml-algorithms/src/main/resources/mappings/ml-memory-meta.json b/common/src/main/resources/mappings/ml-memory-meta.json similarity index 100% rename from ml-algorithms/src/main/resources/mappings/ml-memory-meta.json rename to common/src/main/resources/mappings/ml-memory-meta.json diff --git a/ml-algorithms/src/main/resources/mappings/ml-model-group.json b/common/src/main/resources/mappings/ml-model-group.json similarity index 100% rename from ml-algorithms/src/main/resources/mappings/ml-model-group.json rename to common/src/main/resources/mappings/ml-model-group.json diff --git a/ml-algorithms/src/main/resources/mappings/ml-model.json b/common/src/main/resources/mappings/ml-model.json similarity index 100% rename from ml-algorithms/src/main/resources/mappings/ml-model.json rename to common/src/main/resources/mappings/ml-model.json diff --git a/ml-algorithms/src/main/resources/mappings/ml-task.json b/common/src/main/resources/mappings/ml-task.json similarity index 100% rename from ml-algorithms/src/main/resources/mappings/ml-task.json rename to common/src/main/resources/mappings/ml-task.json From 63bc70d7a4fd9ab6d15e771b424a6205067dd51d Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Tue, 29 Oct 2024 15:47:00 -0700 Subject: [PATCH 10/29] refactor: ensure that conversationindexconstants reference mlindex enums rather than use their own mappings Signed-off-by: Pavan Yekbote --- .../org/opensearch/ml/common}/MLIndex.java | 2 +- .../ConversationalIndexConstants.java | 75 ++----------------- .../resources/mappings/ml-memory-meta.json | 3 + .../MetricsCorrelation.java | 2 +- .../ml/engine/indices/MLIndicesHandler.java | 1 + 5 files changed, 12 insertions(+), 71 deletions(-) rename {ml-algorithms/src/main/java/org/opensearch/ml/engine/indices => common/src/main/java/org/opensearch/ml/common}/MLIndex.java (98%) diff --git a/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java b/common/src/main/java/org/opensearch/ml/common/MLIndex.java similarity index 98% rename from ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java rename to common/src/main/java/org/opensearch/ml/common/MLIndex.java index 4332eafe2e..f85463abdd 100644 --- a/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java +++ b/common/src/main/java/org/opensearch/ml/common/MLIndex.java @@ -3,7 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.ml.engine.indices; +package org.opensearch.ml.common; import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX; import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX_MAPPING_PATH; diff --git a/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java b/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java index ac639babb2..52f83946ae 100644 --- a/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java +++ b/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java @@ -18,15 +18,15 @@ package org.opensearch.ml.common.conversation; import org.opensearch.common.settings.Setting; +import org.opensearch.ml.common.MLIndex; /** * Class containing a bunch of constant defining how the conversational indices are formatted + * ToDo: use MLIndex.MEMORY_MESSAGE and MLIndex.MEMORY_META for index names and mappings */ public class ConversationalIndexConstants { - /** Version of the meta index schema */ - public final static Integer META_INDEX_SCHEMA_VERSION = 2; /** Name of the conversational metadata index */ - public final static String META_INDEX_NAME = ".plugins-ml-memory-meta"; + public final static String META_INDEX_NAME = MLIndex.MEMORY_META.getIndexName(); /** Name of the metadata field for initial timestamp */ public final static String META_CREATED_TIME_FIELD = "create_time"; /** Name of the metadata field for updated timestamp */ @@ -41,38 +41,10 @@ public class ConversationalIndexConstants { public final static String META_ADDITIONAL_INFO_FIELD = "additional_info"; /** Mappings for the conversational metadata index */ - public final static String META_MAPPING = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + META_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + META_NAME_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + META_CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + META_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + USER_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + APPLICATION_TYPE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + META_ADDITIONAL_INFO_FIELD - + "\": {\"type\": \"flat_object\"}\n" - + " }\n" - + "}"; + public final static String META_MAPPING = MLIndex.MEMORY_META.getMapping(); - /** Version of the interactions index schema */ - public final static Integer INTERACTIONS_INDEX_SCHEMA_VERSION = 1; /** Name of the conversational interactions index */ - public final static String INTERACTIONS_INDEX_NAME = ".plugins-ml-memory-message"; + public final static String INTERACTIONS_INDEX_NAME = MLIndex.MEMORY_MESSAGE.getIndexName(); /** Name of the interaction field for the conversation Id */ public final static String INTERACTIONS_CONVERSATION_ID_FIELD = "memory_id"; /** Name of the interaction field for the human input */ @@ -92,42 +64,7 @@ public class ConversationalIndexConstants { /** The trace number of an interaction */ public final static String INTERACTIONS_TRACE_NUMBER_FIELD = "trace_number"; /** Mappings for the interactions index */ - public final static String INTERACTIONS_MAPPINGS = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + INTERACTIONS_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + INTERACTIONS_CONVERSATION_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_CREATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + INTERACTIONS_INPUT_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_PROMPT_TEMPLATE_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_RESPONSE_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_ORIGIN_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_ADDITIONAL_INFO_FIELD - + "\": {\"type\": \"flat_object\"},\n" - + " \"" - + PARENT_INTERACTIONS_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_TRACE_NUMBER_FIELD - + "\": {\"type\": \"long\"}\n" - + " }\n" - + "}"; + public final static String INTERACTIONS_MAPPINGS = MLIndex.MEMORY_MESSAGE.getMapping(); /** Feature Flag setting for conversational memory */ public static final Setting ML_COMMONS_MEMORY_FEATURE_ENABLED = Setting diff --git a/common/src/main/resources/mappings/ml-memory-meta.json b/common/src/main/resources/mappings/ml-memory-meta.json index e091d478a5..7684e25d06 100644 --- a/common/src/main/resources/mappings/ml-memory-meta.json +++ b/common/src/main/resources/mappings/ml-memory-meta.json @@ -19,6 +19,9 @@ }, "application_type": { "type": "keyword" + }, + "additional_info": { + "type": "flat_object" } } } diff --git a/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java b/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java index 82beb87f94..9efe9372b8 100644 --- a/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java +++ b/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java @@ -39,6 +39,7 @@ import org.opensearch.ml.common.AccessMode; import org.opensearch.ml.common.CommonValue; import org.opensearch.ml.common.FunctionName; +import org.opensearch.ml.common.MLIndex; import org.opensearch.ml.common.MLModel; import org.opensearch.ml.common.MLModelGroup; import org.opensearch.ml.common.MLTask; @@ -68,7 +69,6 @@ import org.opensearch.ml.common.transport.task.MLTaskGetResponse; import org.opensearch.ml.engine.algorithms.DLModelExecute; import org.opensearch.ml.engine.annotation.Function; -import org.opensearch.ml.engine.indices.MLIndex; import org.opensearch.search.builder.SearchSourceBuilder; import com.google.common.annotations.VisibleForTesting; diff --git a/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndicesHandler.java b/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndicesHandler.java index d99eb4fa7b..c44846fa98 100644 --- a/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndicesHandler.java +++ b/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndicesHandler.java @@ -29,6 +29,7 @@ import org.opensearch.common.xcontent.XContentType; import org.opensearch.core.action.ActionListener; import org.opensearch.ml.common.CommonValue; +import org.opensearch.ml.common.MLIndex; import org.opensearch.ml.common.exception.MLException; import lombok.AccessLevel; From 88673edf7cbe2e93281e426aa8cac60c0e97cf39 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Tue, 29 Oct 2024 15:51:06 -0700 Subject: [PATCH 11/29] refactor: update comment Signed-off-by: Pavan Yekbote --- .../ml/common/conversation/ConversationalIndexConstants.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java b/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java index 52f83946ae..88f4920761 100644 --- a/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java +++ b/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java @@ -22,7 +22,7 @@ /** * Class containing a bunch of constant defining how the conversational indices are formatted - * ToDo: use MLIndex.MEMORY_MESSAGE and MLIndex.MEMORY_META for index names and mappings + * ToDo: use MLIndex.MEMORY_MESSAGE and MLIndex.MEMORY_META directly for index names and mappings rather than constants */ public class ConversationalIndexConstants { /** Name of the conversational metadata index */ From 409fbd98c1a1370c1660446d263be4b9170cce6c Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Tue, 29 Oct 2024 17:12:34 -0700 Subject: [PATCH 12/29] feat: add enhancements to validate index schema and allow using placeholders Signed-off-by: Pavan Yekbote --- common/build.gradle | 3 + .../ml/common/utils/IndexUtils.java | 58 ++++++++++++++- .../ml/common/utils/StringUtils.java | 31 ++++++++ .../src/main/resources/mappings/ml-model.json | 74 +------------------ .../src/main/resources/mappings/ml-task.json | 39 +--------- .../mappings/placeholders/connector.json | 34 +++++++++ .../resources/mappings/placeholders/user.json | 38 ++++++++++ .../src/main/resources/mappings/schema.json | 25 +++++++ .../ml/common/utils/StringUtilsTest.java | 20 +++++ 9 files changed, 210 insertions(+), 112 deletions(-) create mode 100644 common/src/main/resources/mappings/placeholders/connector.json create mode 100644 common/src/main/resources/mappings/placeholders/user.json create mode 100644 common/src/main/resources/mappings/schema.json diff --git a/common/build.gradle b/common/build.gradle index 60edb3101a..0f008f6c7b 100644 --- a/common/build.gradle +++ b/common/build.gradle @@ -35,6 +35,9 @@ dependencies { exclude group: 'com.google.guava', module: 'listenablefuture' } compileOnly 'com.jayway.jsonpath:json-path:2.9.0' + implementation("com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}") + implementation("com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}") + implementation group: 'com.networknt' , name: 'json-schema-validator', version: '1.4.0' } lombok { diff --git a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java index bbe76fb838..04c97312cc 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java @@ -5,6 +5,8 @@ package org.opensearch.ml.common.utils; +import static org.opensearch.ml.common.utils.StringUtils.validateSchema; + import java.io.IOException; import java.net.URL; import java.util.Map; @@ -41,6 +43,15 @@ public class IndexUtils { public static final Map UPDATED_DEFAULT_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-1"); public static final Map UPDATED_ALL_NODES_REPLICA_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-all"); + // Schema that validates system index mappings + public static final String MAPPING_SCHEMA_PATH = "mappings/schema.json"; + + // Placeholders to use within the json mapping files + private static final String USER_PLACEHOLDER = "USER_MAPPING_PLACEHOLDER"; + private static final String CONNECTOR_PLACEHOLDER = "CONNECTOR_MAPPING_PLACEHOLDER"; + public static final Map MAPPING_PLACEHOLDERS = Map + .of(USER_PLACEHOLDER, "mappings/placeholders/user.json", CONNECTOR_PLACEHOLDER, "mappings/placeholders/connector.json"); + public static String getMappingFromFile(String path) throws IOException { URL url = IndexUtils.class.getClassLoader().getResource(path); if (url == null) { @@ -48,13 +59,56 @@ public static String getMappingFromFile(String path) throws IOException { } String mapping = Resources.toString(url, Charsets.UTF_8); - if (!StringUtils.isJson(mapping)) { - throw new JsonSyntaxException("Mapping is not a valid JSON: " + path); + mapping = replacePlaceholders(mapping); + validateMapping(mapping); + + return mapping; + } + + public static String replacePlaceholders(String mapping) throws IOException { + for (Map.Entry placeholder : MAPPING_PLACEHOLDERS.entrySet()) { + URL url = IndexUtils.class.getClassLoader().getResource(placeholder.getValue()); + if (url == null) { + throw new IOException("Resource not found: " + placeholder.getValue()); + } + + String placeholderMapping = Resources.toString(url, Charsets.UTF_8); + mapping = mapping.replace(placeholder.getKey(), placeholderMapping); } return mapping; } + /* + - Checks if mapping is a valid json + - Validates mapping against a schema found in mappings/schema.json + - Schema validates the following: + - Below fields are present: + - "_meta" + - "_meta.schema_version" + - "properties" + - No additional fields at root level + - No additional fields in "_meta" object + - "properties" is an object type + - "_meta" is an object type + - "_meta_.schema_version" provided type is integer + + Note: validation can be further restricted if the schema for each index is well-defined + */ + public static void validateMapping(String mapping) throws IOException { + if (!StringUtils.isJson(mapping)) { + throw new JsonSyntaxException("Mapping is not a valid JSON: " + mapping); + } + + URL url = IndexUtils.class.getClassLoader().getResource(MAPPING_SCHEMA_PATH); + if (url == null) { + throw new IOException("Resource not found: " + MAPPING_SCHEMA_PATH); + } + + String schema = Resources.toString(url, Charsets.UTF_8); + validateSchema(schema, mapping); + } + public static Integer getVersionFromMapping(String mapping) { JsonObject mappingJson = StringUtils.getJsonObjectFromString(mapping); if (mappingJson == null || !mappingJson.has("_meta")) { diff --git a/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java index 81e93580a0..b06274078c 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java @@ -5,12 +5,14 @@ package org.opensearch.ml.common.utils; +import java.io.IOException; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.security.AccessController; import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -23,13 +25,20 @@ import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; +import org.opensearch.OpenSearchParseException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import com.google.gson.JsonElement; import com.google.gson.JsonObject; import com.google.gson.JsonParser; import com.google.gson.JsonSyntaxException; import com.jayway.jsonpath.JsonPath; +import com.networknt.schema.JsonSchema; +import com.networknt.schema.JsonSchemaFactory; +import com.networknt.schema.SpecVersion; +import com.networknt.schema.ValidationMessage; import lombok.extern.log4j.Log4j2; @@ -324,4 +333,26 @@ public static JsonObject getJsonObjectFromString(String jsonString) { return JsonParser.parseString(jsonString).getAsJsonObject(); } + public static void validateSchema(String schemaString, String instanceString) throws IOException { + ObjectMapper mapper = new ObjectMapper(); + // parse the schema JSON as string + JsonNode schemaNode = mapper.readTree(schemaString); + JsonSchema schema = JsonSchemaFactory.getInstance(SpecVersion.VersionFlag.V202012).getSchema(schemaNode); + + // JSON data to validate + JsonNode jsonNode = mapper.readTree(instanceString); + + // Validate JSON node against the schema + Set errors = schema.validate(jsonNode); + if (!errors.isEmpty()) { + throw new OpenSearchParseException( + "Validation failed: " + + Arrays.toString(errors.toArray(new ValidationMessage[0])) + + " for instance: " + + instanceString + + " with schema: " + + schemaString + ); + } + } } diff --git a/common/src/main/resources/mappings/ml-model.json b/common/src/main/resources/mappings/ml-model.json index b996e463cd..e9b6c4fba4 100644 --- a/common/src/main/resources/mappings/ml-model.json +++ b/common/src/main/resources/mappings/ml-model.json @@ -167,77 +167,7 @@ } } }, - "connector": { - "properties": { - "name": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - }, - "version": { - "type": "keyword" - }, - "description": { - "type": "text" - }, - "protocol": { - "type": "keyword" - }, - "parameters": { - "type": "flat_object" - }, - "credential": { - "type": "flat_object" - }, - "client_config": { - "type": "flat_object" - }, - "actions": { - "type": "flat_object" - } - } - }, - "user": { - "type": "nested", - "properties": { - "name": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - }, - "backend_roles": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword" - } - } - }, - "roles": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword" - } - } - }, - "custom_attribute_names": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword" - } - } - } - } - } + "connector": CONNECTOR_MAPPING_PLACEHOLDER, + "user": USER_MAPPING_PLACEHOLDER } } diff --git a/common/src/main/resources/mappings/ml-task.json b/common/src/main/resources/mappings/ml-task.json index ad428724bf..87ff73763a 100644 --- a/common/src/main/resources/mappings/ml-task.json +++ b/common/src/main/resources/mappings/ml-task.json @@ -44,43 +44,6 @@ "remote_job": { "type": "flat_object" }, - "user": { - "type": "nested", - "properties": { - "name": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - }, - "backend_roles": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword" - } - } - }, - "roles": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword" - } - } - }, - "custom_attribute_names": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword" - } - } - } - } - } + "user": USER_MAPPING_PLACEHOLDER } } diff --git a/common/src/main/resources/mappings/placeholders/connector.json b/common/src/main/resources/mappings/placeholders/connector.json new file mode 100644 index 0000000000..3955e886e5 --- /dev/null +++ b/common/src/main/resources/mappings/placeholders/connector.json @@ -0,0 +1,34 @@ +{ + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "version": { + "type": "keyword" + }, + "description": { + "type": "text" + }, + "protocol": { + "type": "keyword" + }, + "parameters": { + "type": "flat_object" + }, + "credential": { + "type": "flat_object" + }, + "client_config": { + "type": "flat_object" + }, + "actions": { + "type": "flat_object" + } + } +} diff --git a/common/src/main/resources/mappings/placeholders/user.json b/common/src/main/resources/mappings/placeholders/user.json new file mode 100644 index 0000000000..b3d2ed68a5 --- /dev/null +++ b/common/src/main/resources/mappings/placeholders/user.json @@ -0,0 +1,38 @@ +{ + "type": "nested", + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "backend_roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "custom_attribute_names": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + } + } +} diff --git a/common/src/main/resources/mappings/schema.json b/common/src/main/resources/mappings/schema.json new file mode 100644 index 0000000000..b8e326bb38 --- /dev/null +++ b/common/src/main/resources/mappings/schema.json @@ -0,0 +1,25 @@ +{ + "type": "object", + "properties": { + "_meta": { + "type": "object", + "properties": { + "schema_version": { + "type": "integer" + } + }, + "required": [ + "schema_version" + ], + "additionalProperties": false + }, + "properties": { + "type": "object" + } + }, + "required": [ + "_meta", + "properties" + ], + "additionalProperties": false +} diff --git a/common/src/test/java/org/opensearch/ml/common/utils/StringUtilsTest.java b/common/src/test/java/org/opensearch/ml/common/utils/StringUtilsTest.java index ef5307ae46..d440c44faf 100644 --- a/common/src/test/java/org/opensearch/ml/common/utils/StringUtilsTest.java +++ b/common/src/test/java/org/opensearch/ml/common/utils/StringUtilsTest.java @@ -6,6 +6,7 @@ package org.opensearch.ml.common.utils; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; import static org.opensearch.ml.common.utils.StringUtils.TO_STRING_FUNCTION_NAME; import static org.opensearch.ml.common.utils.StringUtils.collectToStringPrefixes; import static org.opensearch.ml.common.utils.StringUtils.getJsonPath; @@ -14,6 +15,7 @@ import static org.opensearch.ml.common.utils.StringUtils.parseParameters; import static org.opensearch.ml.common.utils.StringUtils.toJson; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -25,6 +27,7 @@ import org.apache.commons.text.StringSubstitutor; import org.junit.Assert; import org.junit.Test; +import org.opensearch.OpenSearchParseException; public class StringUtilsTest { @@ -507,4 +510,21 @@ public void testisValidJSONPath_WithFilter() { Assert.assertTrue(isValidJSONPath("$['store','warehouse']")); Assert.assertTrue(isValidJSONPath("$..book[?(@.price > 20)].title")); } + + @Test + public void testValidateSchema() throws IOException { + String schema = "{" + + "\"type\": \"object\"," + + "\"properties\": {" + + " \"key1\": {\"type\": \"string\"}," + + " \"key2\": {\"type\": \"integer\"}" + + "}," + + "\"required\": [\"key1\", \"key2\"]" + + "}"; + String json = "{\"key1\": \"foo\", \"key2\": 123}"; + StringUtils.validateSchema(schema, json); + + String json2 = "{\"key1\": \"foo\"}"; + assertThrows(OpenSearchParseException.class, () -> StringUtils.validateSchema(schema, json2)); + } } From af4d1d05d173371fffb627bd927fedc3643d17e3 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Tue, 29 Oct 2024 17:19:21 -0700 Subject: [PATCH 13/29] refactor: modifying comment Signed-off-by: Pavan Yekbote --- .../main/java/org/opensearch/ml/common/utils/IndexUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java index 04c97312cc..fe9c2cd626 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java @@ -93,7 +93,7 @@ public static String replacePlaceholders(String mapping) throws IOException { - "_meta" is an object type - "_meta_.schema_version" provided type is integer - Note: validation can be further restricted if the schema for each index is well-defined + Note: Validation can be made more strict if a specific schema is defined for each index. */ public static void validateMapping(String mapping) throws IOException { if (!StringUtils.isJson(mapping)) { From b383b48d532c75fbd11e60efe4ce7a6afa5f95d1 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Tue, 29 Oct 2024 17:26:35 -0700 Subject: [PATCH 14/29] test: adding testcase for MLIndex to catch failures before runtime Signed-off-by: Pavan Yekbote --- common/src/test/java/MLIndexTest.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 common/src/test/java/MLIndexTest.java diff --git a/common/src/test/java/MLIndexTest.java b/common/src/test/java/MLIndexTest.java new file mode 100644 index 0000000000..df949d6cb1 --- /dev/null +++ b/common/src/test/java/MLIndexTest.java @@ -0,0 +1,17 @@ +import org.junit.Test; +import org.opensearch.ml.common.MLIndex; + +public class MLIndexTest { + + /** + * Mappings are initialised during runtime when the MLIndex enum is referenced. + * We want to catch any failure in mapping assignment before runtime. + * This test simply references the enums to fetch the mapping. It will fail in case the enum is not initialized. + **/ + @Test + public void testValidateMappingsForSystemIndices() { + for (MLIndex index : MLIndex.values()) { + String mapping = index.getMapping(); + } + } +} From 56d945b0f52246b6643d233bb0a77ec26b8221e8 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Thu, 31 Oct 2024 14:51:23 -0700 Subject: [PATCH 15/29] refactor: rename dir from mappings to index-mappings Signed-off-by: Pavan Yekbote --- .../org/opensearch/ml/common/CommonValue.java | 18 +++++++++--------- .../{mappings => index-mappings}/ml-agent.json | 0 .../ml-config.json | 0 .../ml-connector.json | 0 .../ml-controller.json | 0 .../ml-memory-message.json | 0 .../ml-memory-meta.json | 0 .../ml-model-group.json | 0 .../{mappings => index-mappings}/ml-model.json | 0 .../{mappings => index-mappings}/ml-task.json | 0 .../ml/common/utils/IndexUtilsTest.java | 8 ++++---- .../test-mapping-malformed.json | 0 .../test-mapping.json | 0 13 files changed, 13 insertions(+), 13 deletions(-) rename common/src/main/resources/{mappings => index-mappings}/ml-agent.json (100%) rename common/src/main/resources/{mappings => index-mappings}/ml-config.json (100%) rename common/src/main/resources/{mappings => index-mappings}/ml-connector.json (100%) rename common/src/main/resources/{mappings => index-mappings}/ml-controller.json (100%) rename common/src/main/resources/{mappings => index-mappings}/ml-memory-message.json (100%) rename common/src/main/resources/{mappings => index-mappings}/ml-memory-meta.json (100%) rename common/src/main/resources/{mappings => index-mappings}/ml-model-group.json (100%) rename common/src/main/resources/{mappings => index-mappings}/ml-model.json (100%) rename common/src/main/resources/{mappings => index-mappings}/ml-task.json (100%) rename common/src/test/resources/{mappings => index-mappings}/test-mapping-malformed.json (100%) rename common/src/test/resources/{mappings => index-mappings}/test-mapping.json (100%) diff --git a/common/src/main/java/org/opensearch/ml/common/CommonValue.java b/common/src/main/java/org/opensearch/ml/common/CommonValue.java index 09081e4b49..e06e552536 100644 --- a/common/src/main/java/org/opensearch/ml/common/CommonValue.java +++ b/common/src/main/java/org/opensearch/ml/common/CommonValue.java @@ -44,15 +44,15 @@ public class CommonValue { public static final Set stopWordsIndices = ImmutableSet.of(".plugins-ml-stop-words"); // Index mapping paths - public static final String ML_MODEL_GROUP_INDEX_MAPPING_PATH = "mappings/ml-model-group.json"; - public static final String ML_MODEL_INDEX_MAPPING_PATH = "mappings/ml-model.json"; - public static final String ML_TASK_INDEX_MAPPING_PATH = "mappings/ml-task.json"; - public static final String ML_CONNECTOR_INDEX_MAPPING_PATH = "mappings/ml-connector.json"; - public static final String ML_CONFIG_INDEX_MAPPING_PATH = "mappings/ml-config.json"; - public static final String ML_CONTROLLER_INDEX_MAPPING_PATH = "mappings/ml-controller.json"; - public static final String ML_AGENT_INDEX_MAPPING_PATH = "mappings/ml-agent.json"; - public static final String ML_MEMORY_META_INDEX_MAPPING_PATH = "mappings/ml-memory-meta.json"; - public static final String ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH = "mappings/ml-memory-message.json"; + public static final String ML_MODEL_GROUP_INDEX_MAPPING_PATH = "index-mappings/ml-model-group.json"; + public static final String ML_MODEL_INDEX_MAPPING_PATH = "index-mappings/ml-model.json"; + public static final String ML_TASK_INDEX_MAPPING_PATH = "index-mappings/ml-task.json"; + public static final String ML_CONNECTOR_INDEX_MAPPING_PATH = "index-mappings/ml-connector.json"; + public static final String ML_CONFIG_INDEX_MAPPING_PATH = "index-mappings/ml-config.json"; + public static final String ML_CONTROLLER_INDEX_MAPPING_PATH = "index-mappings/ml-controller.json"; + public static final String ML_AGENT_INDEX_MAPPING_PATH = "index-mappings/ml-agent.json"; + public static final String ML_MEMORY_META_INDEX_MAPPING_PATH = "index-mappings/ml-memory-meta.json"; + public static final String ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH = "index-mappings/ml-memory-message.json"; // Calculate Versions independently of OpenSearch core version public static final Version VERSION_2_11_0 = Version.fromString("2.11.0"); diff --git a/common/src/main/resources/mappings/ml-agent.json b/common/src/main/resources/index-mappings/ml-agent.json similarity index 100% rename from common/src/main/resources/mappings/ml-agent.json rename to common/src/main/resources/index-mappings/ml-agent.json diff --git a/common/src/main/resources/mappings/ml-config.json b/common/src/main/resources/index-mappings/ml-config.json similarity index 100% rename from common/src/main/resources/mappings/ml-config.json rename to common/src/main/resources/index-mappings/ml-config.json diff --git a/common/src/main/resources/mappings/ml-connector.json b/common/src/main/resources/index-mappings/ml-connector.json similarity index 100% rename from common/src/main/resources/mappings/ml-connector.json rename to common/src/main/resources/index-mappings/ml-connector.json diff --git a/common/src/main/resources/mappings/ml-controller.json b/common/src/main/resources/index-mappings/ml-controller.json similarity index 100% rename from common/src/main/resources/mappings/ml-controller.json rename to common/src/main/resources/index-mappings/ml-controller.json diff --git a/common/src/main/resources/mappings/ml-memory-message.json b/common/src/main/resources/index-mappings/ml-memory-message.json similarity index 100% rename from common/src/main/resources/mappings/ml-memory-message.json rename to common/src/main/resources/index-mappings/ml-memory-message.json diff --git a/common/src/main/resources/mappings/ml-memory-meta.json b/common/src/main/resources/index-mappings/ml-memory-meta.json similarity index 100% rename from common/src/main/resources/mappings/ml-memory-meta.json rename to common/src/main/resources/index-mappings/ml-memory-meta.json diff --git a/common/src/main/resources/mappings/ml-model-group.json b/common/src/main/resources/index-mappings/ml-model-group.json similarity index 100% rename from common/src/main/resources/mappings/ml-model-group.json rename to common/src/main/resources/index-mappings/ml-model-group.json diff --git a/common/src/main/resources/mappings/ml-model.json b/common/src/main/resources/index-mappings/ml-model.json similarity index 100% rename from common/src/main/resources/mappings/ml-model.json rename to common/src/main/resources/index-mappings/ml-model.json diff --git a/common/src/main/resources/mappings/ml-task.json b/common/src/main/resources/index-mappings/ml-task.json similarity index 100% rename from common/src/main/resources/mappings/ml-task.json rename to common/src/main/resources/index-mappings/ml-task.json diff --git a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java index a11b2e200f..89ab31c51b 100644 --- a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java +++ b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java @@ -67,23 +67,23 @@ public void testGetMappingFromFile() { + " }\n" + "}\n"; try { - String actualMapping = IndexUtils.getMappingFromFile("mappings/test-mapping.json"); + String actualMapping = IndexUtils.getMappingFromFile("index-mappings/test-mapping.json"); assertEquals(expectedMapping, actualMapping); } catch (IOException e) { - throw new RuntimeException("Failed to read file at path: mappings/test-mapping.json"); + throw new RuntimeException("Failed to read file at path: index-mappings/test-mapping.json"); } } @Test public void testGetMappingFromFileFileNotFound() { - String path = "mappings/test-mapping-not-found.json"; + String path = "index-mappings/test-mapping-not-found.json"; IOException e = assertThrows(IOException.class, () -> IndexUtils.getMappingFromFile(path)); assertEquals("Resource not found: " + path, e.getMessage()); } @Test public void testGetMappingFromFilesMalformedJson() { - String path = "mappings/test-mapping-malformed.json"; + String path = "index-mappings/test-mapping-malformed.json"; JsonSyntaxException e = assertThrows(JsonSyntaxException.class, () -> IndexUtils.getMappingFromFile(path)); assertEquals("Mapping is not a valid JSON: " + path, e.getMessage()); } diff --git a/common/src/test/resources/mappings/test-mapping-malformed.json b/common/src/test/resources/index-mappings/test-mapping-malformed.json similarity index 100% rename from common/src/test/resources/mappings/test-mapping-malformed.json rename to common/src/test/resources/index-mappings/test-mapping-malformed.json diff --git a/common/src/test/resources/mappings/test-mapping.json b/common/src/test/resources/index-mappings/test-mapping.json similarity index 100% rename from common/src/test/resources/mappings/test-mapping.json rename to common/src/test/resources/index-mappings/test-mapping.json From 4ea99eacdebf1e0333081d1b7095716c24b47d2f Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Thu, 31 Oct 2024 15:05:42 -0700 Subject: [PATCH 16/29] fix: add null checks Signed-off-by: Pavan Yekbote --- .../opensearch/ml/common/utils/IndexUtils.java | 8 ++++++++ .../ml/common/utils/StringUtils.java | 18 +++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java index bbe76fb838..f42e792759 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java @@ -48,6 +48,10 @@ public static String getMappingFromFile(String path) throws IOException { } String mapping = Resources.toString(url, Charsets.UTF_8); + if (mapping == null || mapping.isBlank()) { + throw new IllegalArgumentException("Empty mapping found at: " + path); + } + if (!StringUtils.isJson(mapping)) { throw new JsonSyntaxException("Mapping is not a valid JSON: " + path); } @@ -56,6 +60,10 @@ public static String getMappingFromFile(String path) throws IOException { } public static Integer getVersionFromMapping(String mapping) { + if (mapping == null || mapping.isBlank()) { + throw new IllegalArgumentException("Mapping cannot be null or empty"); + } + JsonObject mappingJson = StringUtils.getJsonObjectFromString(mapping); if (mappingJson == null || !mappingJson.has("_meta")) { throw new JsonParseException("Failed to find \"_meta\" object in mapping: " + mapping); diff --git a/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java index 81e93580a0..fcc0c4c3c9 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java @@ -54,12 +54,16 @@ public class StringUtils { } public static final String TO_STRING_FUNCTION_NAME = ".toString()"; - public static boolean isValidJsonString(String Json) { + public static boolean isValidJsonString(String json) { + if (json == null || json.isBlank()) { + return false; + } + try { - new JSONObject(Json); + new JSONObject(json); } catch (JSONException ex) { try { - new JSONArray(Json); + new JSONArray(json); } catch (JSONException ex1) { return false; } @@ -68,6 +72,10 @@ public static boolean isValidJsonString(String Json) { } public static boolean isJson(String json) { + if (json == null || json.isBlank()) { + return false; + } + try { if (!isValidJsonString(json)) { return false; @@ -321,6 +329,10 @@ public static boolean isValidJSONPath(String input) { } public static JsonObject getJsonObjectFromString(String jsonString) { + if (jsonString == null || jsonString.isBlank()) { + throw new IllegalArgumentException("Json cannot be null or empty"); + } + return JsonParser.parseString(jsonString).getAsJsonObject(); } From ba9d2323099c1fbea500d613ad677eee3261e656 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Thu, 31 Oct 2024 15:17:45 -0700 Subject: [PATCH 17/29] fix: modify mappin paths for placeholders Signed-off-by: Pavan Yekbote --- .../org/opensearch/ml/common/utils/IndexUtils.java | 4 ++-- common/src/test/java/MLIndexTest.java | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java index 8ae866b92c..de72b4b33e 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java @@ -44,13 +44,13 @@ public class IndexUtils { public static final Map UPDATED_ALL_NODES_REPLICA_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-all"); // Schema that validates system index mappings - public static final String MAPPING_SCHEMA_PATH = "mappings/schema.json"; + public static final String MAPPING_SCHEMA_PATH = "index-mappings/schema.json"; // Placeholders to use within the json mapping files private static final String USER_PLACEHOLDER = "USER_MAPPING_PLACEHOLDER"; private static final String CONNECTOR_PLACEHOLDER = "CONNECTOR_MAPPING_PLACEHOLDER"; public static final Map MAPPING_PLACEHOLDERS = Map - .of(USER_PLACEHOLDER, "mappings/placeholders/user.json", CONNECTOR_PLACEHOLDER, "mappings/placeholders/connector.json"); + .of(USER_PLACEHOLDER, "index-mappings/placeholders/user.json", CONNECTOR_PLACEHOLDER, "index-mappings/placeholders/connector.json"); public static String getMappingFromFile(String path) throws IOException { URL url = IndexUtils.class.getClassLoader().getResource(path); diff --git a/common/src/test/java/MLIndexTest.java b/common/src/test/java/MLIndexTest.java index df949d6cb1..c0fa050cd7 100644 --- a/common/src/test/java/MLIndexTest.java +++ b/common/src/test/java/MLIndexTest.java @@ -8,10 +8,10 @@ public class MLIndexTest { * We want to catch any failure in mapping assignment before runtime. * This test simply references the enums to fetch the mapping. It will fail in case the enum is not initialized. **/ - @Test - public void testValidateMappingsForSystemIndices() { - for (MLIndex index : MLIndex.values()) { - String mapping = index.getMapping(); - } - } + @Test + public void testValidateMappingsForSystemIndices() { + for (MLIndex index : MLIndex.values()) { + String mapping = index.getMapping(); + } + } } From ce72e53035f9941b20f0369e638a18f2eee5479b Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Fri, 1 Nov 2024 17:20:59 -0700 Subject: [PATCH 18/29] fix: adding dependencies for testing Signed-off-by: Pavan Yekbote --- common/build.gradle | 1 + memory/build.gradle | 1 + 2 files changed, 2 insertions(+) diff --git a/common/build.gradle b/common/build.gradle index 60edb3101a..979752bc05 100644 --- a/common/build.gradle +++ b/common/build.gradle @@ -26,6 +26,7 @@ dependencies { compileOnly group: 'org.apache.commons', name: 'commons-text', version: '1.10.0' compileOnly group: 'com.google.code.gson', name: 'gson', version: '2.10.1' compileOnly group: 'org.json', name: 'json', version: '20231013' + testImplementation group: 'org.json', name: 'json', version: '20231013' implementation('com.google.guava:guava:32.1.2-jre') { exclude group: 'com.google.guava', module: 'failureaccess' exclude group: 'com.google.code.findbugs', module: 'jsr305' diff --git a/memory/build.gradle b/memory/build.gradle index b6198509d0..8251303158 100644 --- a/memory/build.gradle +++ b/memory/build.gradle @@ -37,6 +37,7 @@ dependencies { testImplementation "org.opensearch.test:framework:${opensearch_version}" testImplementation "org.opensearch.client:opensearch-rest-client:${opensearch_version}" testImplementation group: 'com.google.code.gson', name: 'gson', version: '2.10.1' + testImplementation group: 'org.json', name: 'json', version: '20231013' } test { From e714c35f98ca9c88f966eaa1b117301c475653c0 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Mon, 4 Nov 2024 16:04:01 -0800 Subject: [PATCH 19/29] fix(test): compare json object rather than strings to avoid eol character issue Signed-off-by: Pavan Yekbote --- .../java/org/opensearch/ml/common/utils/IndexUtilsTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java index 89ab31c51b..f1776b670a 100644 --- a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java +++ b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java @@ -68,7 +68,8 @@ public void testGetMappingFromFile() { + "}\n"; try { String actualMapping = IndexUtils.getMappingFromFile("index-mappings/test-mapping.json"); - assertEquals(expectedMapping, actualMapping); + // comparing JsonObjects to avoid issues caused by eol character in different OS + assertEquals(StringUtils.getJsonObjectFromString(expectedMapping), StringUtils.getJsonObjectFromString(actualMapping)); } catch (IOException e) { throw new RuntimeException("Failed to read file at path: index-mappings/test-mapping.json"); } From 86d2b76b6ebee31430b990491f4ac73a163b6754 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Mon, 25 Nov 2024 18:04:31 -0800 Subject: [PATCH 20/29] refactor: combine if statements into single check Signed-off-by: Pavan Yekbote --- .../org/opensearch/ml/common/utils/IndexUtils.java | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java index f42e792759..643f85968e 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java @@ -13,7 +13,6 @@ import com.google.common.io.Resources; import com.google.gson.JsonObject; import com.google.gson.JsonParseException; -import com.google.gson.JsonSyntaxException; import lombok.extern.log4j.Log4j2; @@ -47,13 +46,9 @@ public static String getMappingFromFile(String path) throws IOException { throw new IOException("Resource not found: " + path); } - String mapping = Resources.toString(url, Charsets.UTF_8); - if (mapping == null || mapping.isBlank()) { - throw new IllegalArgumentException("Empty mapping found at: " + path); - } - - if (!StringUtils.isJson(mapping)) { - throw new JsonSyntaxException("Mapping is not a valid JSON: " + path); + String mapping = Resources.toString(url, Charsets.UTF_8).trim(); + if (mapping.isEmpty() || !StringUtils.isJson(mapping)) { + throw new IllegalArgumentException("Invalid or non-JSON mapping at: " + path); } return mapping; From 1cd16b2a96fa4b8a77101005613dc5a2900daca9 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Mon, 25 Nov 2024 22:19:06 -0800 Subject: [PATCH 21/29] refactoring: null handling + clean code Signed-off-by: Pavan Yekbote --- common/src/main/java/org/opensearch/ml/common/MLIndex.java | 4 ++++ .../java/org/opensearch/ml/common/utils/IndexUtils.java | 6 +++++- .../java/org/opensearch/ml/common/utils/IndexUtilsTest.java | 4 ++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/common/src/main/java/org/opensearch/ml/common/MLIndex.java b/common/src/main/java/org/opensearch/ml/common/MLIndex.java index f85463abdd..c497452c6b 100644 --- a/common/src/main/java/org/opensearch/ml/common/MLIndex.java +++ b/common/src/main/java/org/opensearch/ml/common/MLIndex.java @@ -54,6 +54,10 @@ public enum MLIndex { } private String getMapping(String mappingPath) { + if (mappingPath == null) { + throw new IllegalArgumentException("Mapping path cannot be null"); + } + try { return IndexUtils.getMappingFromFile(mappingPath); } catch (IOException e) { diff --git a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java index 643f85968e..92ccb07bf9 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java @@ -69,6 +69,10 @@ public static Integer getVersionFromMapping(String mapping) { throw new JsonParseException("Failed to find \"schema_version\" in \"_meta\" object for mapping: " + mapping); } - return metaObject.get("schema_version").getAsInt(); + try { + return metaObject.get("schema_version").getAsInt(); + } catch (NumberFormatException | ClassCastException e) { + throw new JsonParseException("Invalid \"schema_version\" value in mapping: " + mapping, e); + } } } diff --git a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java index f1776b670a..56839a00ad 100644 --- a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java +++ b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java @@ -85,8 +85,8 @@ public void testGetMappingFromFileFileNotFound() { @Test public void testGetMappingFromFilesMalformedJson() { String path = "index-mappings/test-mapping-malformed.json"; - JsonSyntaxException e = assertThrows(JsonSyntaxException.class, () -> IndexUtils.getMappingFromFile(path)); - assertEquals("Mapping is not a valid JSON: " + path, e.getMessage()); + IllegalArgumentException e = assertThrows(IllegalArgumentException.class, () -> IndexUtils.getMappingFromFile(path)); + assertEquals("Invalid or non-JSON mapping at: " + path, e.getMessage()); } @Test From 369475d11683c1f2ab841e717ec7de7d802207eb Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Mon, 25 Nov 2024 22:20:13 -0800 Subject: [PATCH 22/29] spotless apply Signed-off-by: Pavan Yekbote --- .../test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java index 56839a00ad..a4b3badacf 100644 --- a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java +++ b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java @@ -14,7 +14,6 @@ import org.junit.Test; import com.google.gson.JsonParseException; -import com.google.gson.JsonSyntaxException; public class IndexUtilsTest { From d8390ca511e4b426335be92b4b0b93b906627e47 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Tue, 26 Nov 2024 13:25:53 -0800 Subject: [PATCH 23/29] tests: adding more UT Signed-off-by: Pavan Yekbote --- common/src/test/java/MLIndexTest.java | 20 +++++++++++++++++++ .../ml/common/utils/IndexUtilsTest.java | 17 +++++++++++++--- .../index-mappings/test-mapping.json | 2 +- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/common/src/test/java/MLIndexTest.java b/common/src/test/java/MLIndexTest.java index c0fa050cd7..f88f34a28c 100644 --- a/common/src/test/java/MLIndexTest.java +++ b/common/src/test/java/MLIndexTest.java @@ -1,5 +1,12 @@ +import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX_MAPPING_PATH; +import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_INDEX_MAPPING_PATH; +import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX_MAPPING_PATH; + +import java.io.IOException; + import org.junit.Test; import org.opensearch.ml.common.MLIndex; +import org.opensearch.ml.common.utils.IndexUtils; public class MLIndexTest { @@ -14,4 +21,17 @@ public void testValidateMappingsForSystemIndices() { String mapping = index.getMapping(); } } + + // adding an explicit check for critical indices + @Test + public void testMappings() throws IOException { + String model_mapping = IndexUtils.getMappingFromFile(ML_MODEL_INDEX_MAPPING_PATH); + IndexUtils.validateMapping(model_mapping); + + String connector_mapping = IndexUtils.getMappingFromFile(ML_CONNECTOR_INDEX_MAPPING_PATH); + IndexUtils.validateMapping(connector_mapping); + + String config_mapping = IndexUtils.getMappingFromFile(ML_CONFIG_INDEX_MAPPING_PATH); + IndexUtils.validateMapping(config_mapping); + } } diff --git a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java index a4b3badacf..f9334db729 100644 --- a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java +++ b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java @@ -12,6 +12,7 @@ import java.util.Map; import org.junit.Test; +import org.opensearch.OpenSearchParseException; import com.google.gson.JsonParseException; @@ -51,7 +52,7 @@ public void testUpdatedAllNodesReplicaIndexSettingsContainsExpectedValues() { public void testGetMappingFromFile() { String expectedMapping = "{\n" + " \"_meta\": {\n" - + " \"schema_version\": \"1\"\n" + + " \"schema_version\": 1\n" + " },\n" + " \"properties\": {\n" + " \"test_field_1\": {\n" @@ -78,14 +79,12 @@ public void testGetMappingFromFile() { public void testGetMappingFromFileFileNotFound() { String path = "index-mappings/test-mapping-not-found.json"; IOException e = assertThrows(IOException.class, () -> IndexUtils.getMappingFromFile(path)); - assertEquals("Resource not found: " + path, e.getMessage()); } @Test public void testGetMappingFromFilesMalformedJson() { String path = "index-mappings/test-mapping-malformed.json"; IllegalArgumentException e = assertThrows(IllegalArgumentException.class, () -> IndexUtils.getMappingFromFile(path)); - assertEquals("Invalid or non-JSON mapping at: " + path, e.getMessage()); } @Test @@ -152,4 +151,16 @@ public void testGetVersionFromMappingNoSchemaVersion() { JsonParseException e = assertThrows(JsonParseException.class, () -> IndexUtils.getVersionFromMapping(mapping)); assertEquals("Failed to find \"schema_version\" in \"_meta\" object for mapping: " + mapping, e.getMessage()); } + + @Test + public void testValidateMapping() { + String empty_mapping = ""; + assertThrows(IllegalArgumentException.class, () -> IndexUtils.validateMapping(empty_mapping)); + + String non_json_mapping = "not a json"; + assertThrows(IllegalArgumentException.class, () -> IndexUtils.validateMapping(non_json_mapping)); + + String illegal_schema_mapping = "{\"key1\": \"foo\"}"; + assertThrows(OpenSearchParseException.class, () -> IndexUtils.validateMapping(illegal_schema_mapping)); + } } diff --git a/common/src/test/resources/index-mappings/test-mapping.json b/common/src/test/resources/index-mappings/test-mapping.json index 6114de4687..9f96c6a0a7 100644 --- a/common/src/test/resources/index-mappings/test-mapping.json +++ b/common/src/test/resources/index-mappings/test-mapping.json @@ -1,6 +1,6 @@ { "_meta": { - "schema_version": "1" + "schema_version": 1 }, "properties": { "test_field_1": { From cba2c88cf66fff6b495ef3ed874211e0a81c5d62 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Tue, 26 Nov 2024 14:57:56 -0800 Subject: [PATCH 24/29] fix: dependencies to handle jarhell Signed-off-by: Pavan Yekbote --- common/build.gradle | 6 +++--- memory/build.gradle | 3 +++ ml-algorithms/build.gradle | 3 +++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/common/build.gradle b/common/build.gradle index 3b40a1e3ec..81673663ef 100644 --- a/common/build.gradle +++ b/common/build.gradle @@ -36,9 +36,9 @@ dependencies { exclude group: 'com.google.guava', module: 'listenablefuture' } compileOnly 'com.jayway.jsonpath:json-path:2.9.0' - implementation("com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}") - implementation("com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}") - implementation group: 'com.networknt' , name: 'json-schema-validator', version: '1.4.0' + compileOnly("com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}") + compileOnly("com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}") + compileOnly group: 'com.networknt' , name: 'json-schema-validator', version: '1.4.0' } lombok { diff --git a/memory/build.gradle b/memory/build.gradle index 8251303158..0453912db1 100644 --- a/memory/build.gradle +++ b/memory/build.gradle @@ -38,6 +38,9 @@ dependencies { testImplementation "org.opensearch.client:opensearch-rest-client:${opensearch_version}" testImplementation group: 'com.google.code.gson', name: 'gson', version: '2.10.1' testImplementation group: 'org.json', name: 'json', version: '20231013' + testImplementation("com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}") + testImplementation("com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}") + testImplementation group: 'com.networknt' , name: 'json-schema-validator', version: '1.4.0' } test { diff --git a/ml-algorithms/build.gradle b/ml-algorithms/build.gradle index 2559341fa1..d55bd07d9d 100644 --- a/ml-algorithms/build.gradle +++ b/ml-algorithms/build.gradle @@ -75,6 +75,9 @@ dependencies { implementation 'com.jayway.jsonpath:json-path:2.9.0' implementation group: 'org.json', name: 'json', version: '20231013' implementation group: 'software.amazon.awssdk', name: 'netty-nio-client', version: '2.25.40' + testImplementation("com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}") + testImplementation("com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}") + testImplementation group: 'com.networknt' , name: 'json-schema-validator', version: '1.4.0' } lombok { From 2884b50203877a3f9fa76576429cb16bf64284b7 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Tue, 26 Nov 2024 17:17:15 -0800 Subject: [PATCH 25/29] spotless apply Signed-off-by: Pavan Yekbote --- .../main/java/org/opensearch/ml/common/utils/IndexUtils.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java index 8dbfe881cc..061fb9ce0e 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java @@ -4,7 +4,9 @@ */ package org.opensearch.ml.common.utils; + import static org.opensearch.ml.common.utils.StringUtils.validateSchema; + import java.io.IOException; import java.net.URL; import java.util.Map; From 3e45f6e25ff0c5c20016e551ea0e5d3246d9f133 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Tue, 3 Dec 2024 19:11:28 -0800 Subject: [PATCH 26/29] refactor: add header and use single instance of mapper Signed-off-by: Pavan Yekbote --- .../java/org/opensearch/ml/common/utils/StringUtils.java | 7 ++++--- .../java/{ => org/opensearch/ml/common}/MLIndexTest.java | 8 +++++++- 2 files changed, 11 insertions(+), 4 deletions(-) rename common/src/test/java/{ => org/opensearch/ml/common}/MLIndexTest.java (92%) diff --git a/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java index 13fdac9b46..ce377e482b 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java @@ -63,6 +63,8 @@ public class StringUtils { } public static final String TO_STRING_FUNCTION_NAME = ".toString()"; + private static final ObjectMapper MAPPER = new ObjectMapper(); + public static boolean isValidJsonString(String json) { if (json == null || json.isBlank()) { return false; @@ -346,13 +348,12 @@ public static JsonObject getJsonObjectFromString(String jsonString) { } public static void validateSchema(String schemaString, String instanceString) throws IOException { - ObjectMapper mapper = new ObjectMapper(); // parse the schema JSON as string - JsonNode schemaNode = mapper.readTree(schemaString); + JsonNode schemaNode = MAPPER.readTree(schemaString); JsonSchema schema = JsonSchemaFactory.getInstance(SpecVersion.VersionFlag.V202012).getSchema(schemaNode); // JSON data to validate - JsonNode jsonNode = mapper.readTree(instanceString); + JsonNode jsonNode = MAPPER.readTree(instanceString); // Validate JSON node against the schema Set errors = schema.validate(jsonNode); diff --git a/common/src/test/java/MLIndexTest.java b/common/src/test/java/org/opensearch/ml/common/MLIndexTest.java similarity index 92% rename from common/src/test/java/MLIndexTest.java rename to common/src/test/java/org/opensearch/ml/common/MLIndexTest.java index f88f34a28c..74d9d44d14 100644 --- a/common/src/test/java/MLIndexTest.java +++ b/common/src/test/java/org/opensearch/ml/common/MLIndexTest.java @@ -1,3 +1,10 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.ml.common; + import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX_MAPPING_PATH; @@ -5,7 +12,6 @@ import java.io.IOException; import org.junit.Test; -import org.opensearch.ml.common.MLIndex; import org.opensearch.ml.common.utils.IndexUtils; public class MLIndexTest { From 399b8924948da7bec799e15f93aa1dd91b914dd7 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Fri, 13 Dec 2024 17:06:44 +0100 Subject: [PATCH 27/29] fixed: doc syntax Signed-off-by: Pavan Yekbote --- .../main/java/org/opensearch/ml/common/utils/IndexUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java index 061fb9ce0e..bec1771f68 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java @@ -86,7 +86,7 @@ public static String replacePlaceholders(String mapping) throws IOException { return mapping; } - /* + /** - Checks if mapping is a valid json - Validates mapping against a schema found in mappings/schema.json - Schema validates the following: From 59998c2b650869f612b8aefe4517e32a022fb5b2 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Fri, 27 Dec 2024 11:36:21 -0800 Subject: [PATCH 28/29] refactor: renamed files, efficient loading of resources, better exception handling Signed-off-by: Pavan Yekbote --- .../org/opensearch/ml/common/CommonValue.java | 18 +++---- .../ml/common/utils/IndexUtils.java | 47 ++++++++++++------- .../ml/common/utils/StringUtils.java | 45 +++++++++--------- .../{ml-agent.json => ml_agent.json} | 0 .../{ml-config.json => ml_config.json} | 0 .../{ml-connector.json => ml_connector.json} | 0 ...{ml-controller.json => ml_controller.json} | 0 ...ry-message.json => ml_memory_message.json} | 0 ...l-memory-meta.json => ml_memory_meta.json} | 0 .../{ml-model.json => ml_model.json} | 0 ...l-model-group.json => ml_model_group.json} | 0 .../{ml-task.json => ml_task.json} | 0 .../ml/common/utils/IndexUtilsTest.java | 8 ++-- .../{test-mapping.json => test_mapping.json} | 0 ...ormed.json => test_mapping_malformed.json} | 0 15 files changed, 67 insertions(+), 51 deletions(-) rename common/src/main/resources/index-mappings/{ml-agent.json => ml_agent.json} (100%) rename common/src/main/resources/index-mappings/{ml-config.json => ml_config.json} (100%) rename common/src/main/resources/index-mappings/{ml-connector.json => ml_connector.json} (100%) rename common/src/main/resources/index-mappings/{ml-controller.json => ml_controller.json} (100%) rename common/src/main/resources/index-mappings/{ml-memory-message.json => ml_memory_message.json} (100%) rename common/src/main/resources/index-mappings/{ml-memory-meta.json => ml_memory_meta.json} (100%) rename common/src/main/resources/index-mappings/{ml-model.json => ml_model.json} (100%) rename common/src/main/resources/index-mappings/{ml-model-group.json => ml_model_group.json} (100%) rename common/src/main/resources/index-mappings/{ml-task.json => ml_task.json} (100%) rename common/src/test/resources/index-mappings/{test-mapping.json => test_mapping.json} (100%) rename common/src/test/resources/index-mappings/{test-mapping-malformed.json => test_mapping_malformed.json} (100%) diff --git a/common/src/main/java/org/opensearch/ml/common/CommonValue.java b/common/src/main/java/org/opensearch/ml/common/CommonValue.java index e06e552536..60d4dbad0e 100644 --- a/common/src/main/java/org/opensearch/ml/common/CommonValue.java +++ b/common/src/main/java/org/opensearch/ml/common/CommonValue.java @@ -44,15 +44,15 @@ public class CommonValue { public static final Set stopWordsIndices = ImmutableSet.of(".plugins-ml-stop-words"); // Index mapping paths - public static final String ML_MODEL_GROUP_INDEX_MAPPING_PATH = "index-mappings/ml-model-group.json"; - public static final String ML_MODEL_INDEX_MAPPING_PATH = "index-mappings/ml-model.json"; - public static final String ML_TASK_INDEX_MAPPING_PATH = "index-mappings/ml-task.json"; - public static final String ML_CONNECTOR_INDEX_MAPPING_PATH = "index-mappings/ml-connector.json"; - public static final String ML_CONFIG_INDEX_MAPPING_PATH = "index-mappings/ml-config.json"; - public static final String ML_CONTROLLER_INDEX_MAPPING_PATH = "index-mappings/ml-controller.json"; - public static final String ML_AGENT_INDEX_MAPPING_PATH = "index-mappings/ml-agent.json"; - public static final String ML_MEMORY_META_INDEX_MAPPING_PATH = "index-mappings/ml-memory-meta.json"; - public static final String ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH = "index-mappings/ml-memory-message.json"; + public static final String ML_MODEL_GROUP_INDEX_MAPPING_PATH = "index-mappings/ml_model_group.json"; + public static final String ML_MODEL_INDEX_MAPPING_PATH = "index-mappings/ml_model.json"; + public static final String ML_TASK_INDEX_MAPPING_PATH = "index-mappings/ml_task.json"; + public static final String ML_CONNECTOR_INDEX_MAPPING_PATH = "index-mappings/ml_connector.json"; + public static final String ML_CONFIG_INDEX_MAPPING_PATH = "index-mappings/ml_config.json"; + public static final String ML_CONTROLLER_INDEX_MAPPING_PATH = "index-mappings/ml_controller.json"; + public static final String ML_AGENT_INDEX_MAPPING_PATH = "index-mappings/ml_agent.json"; + public static final String ML_MEMORY_META_INDEX_MAPPING_PATH = "index-mappings/ml_memory_meta.json"; + public static final String ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH = "index-mappings/ml_memory_message.json"; // Calculate Versions independently of OpenSearch core version public static final Version VERSION_2_11_0 = Version.fromString("2.11.0"); diff --git a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java index bec1771f68..05aa9e791a 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java @@ -9,6 +9,7 @@ import java.io.IOException; import java.net.URL; +import java.util.HashMap; import java.util.Map; import com.google.common.base.Charsets; @@ -73,34 +74,46 @@ public static String replacePlaceholders(String mapping) throws IOException { throw new IllegalArgumentException("Mapping cannot be null or empty"); } + // Preload resources into memory to avoid redundant I/O + Map loadedPlaceholders = new HashMap<>(); for (Map.Entry placeholder : MAPPING_PLACEHOLDERS.entrySet()) { URL url = IndexUtils.class.getClassLoader().getResource(placeholder.getValue()); if (url == null) { throw new IOException("Resource not found: " + placeholder.getValue()); } - String placeholderMapping = Resources.toString(url, Charsets.UTF_8); - mapping = mapping.replace(placeholder.getKey(), placeholderMapping); + loadedPlaceholders.put(placeholder.getKey(), Resources.toString(url, Charsets.UTF_8)); } - return mapping; + StringBuilder result = new StringBuilder(mapping); + for (Map.Entry entry : loadedPlaceholders.entrySet()) { + String placeholder = entry.getKey(); + String replacement = entry.getValue(); + + // Replace all occurrences of the placeholder + int index; + while ((index = result.indexOf(placeholder)) != -1) { + result.replace(index, index + placeholder.length(), replacement); + } + } + + return result.toString(); } /** - - Checks if mapping is a valid json - - Validates mapping against a schema found in mappings/schema.json - - Schema validates the following: - - Below fields are present: - - "_meta" - - "_meta.schema_version" - - "properties" - - No additional fields at root level - - No additional fields in "_meta" object - - "properties" is an object type - - "_meta" is an object type - - "_meta_.schema_version" provided type is integer - - Note: Validation can be made more strict if a specific schema is defined for each index. + * Checks if mapping is a valid json + * Validates mapping against a schema found in mappings/schema.json + * Schema validates the following: + * Below fields are present: + * "_meta" + * "_meta.schema_version" + * "properties" + * No additional fields at root level + * No additional fields in "_meta" object + * "properties" is an object type + * "_meta" is an object type + * "_meta_.schema_version" provided type is integer + * Note: Validation can be made more strict if a specific schema is defined for each index. */ public static void validateMapping(String mapping) throws IOException { if (mapping.isBlank() || !StringUtils.isJson(mapping)) { diff --git a/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java index ce377e482b..8879306773 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java @@ -5,14 +5,12 @@ package org.opensearch.ml.common.utils; -import java.io.IOException; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.security.AccessController; import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -20,6 +18,7 @@ import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; import org.apache.commons.lang3.BooleanUtils; import org.json.JSONArray; @@ -27,6 +26,7 @@ import org.json.JSONObject; import org.opensearch.OpenSearchParseException; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; @@ -347,25 +347,28 @@ public static JsonObject getJsonObjectFromString(String jsonString) { return JsonParser.parseString(jsonString).getAsJsonObject(); } - public static void validateSchema(String schemaString, String instanceString) throws IOException { - // parse the schema JSON as string - JsonNode schemaNode = MAPPER.readTree(schemaString); - JsonSchema schema = JsonSchemaFactory.getInstance(SpecVersion.VersionFlag.V202012).getSchema(schemaNode); - - // JSON data to validate - JsonNode jsonNode = MAPPER.readTree(instanceString); - - // Validate JSON node against the schema - Set errors = schema.validate(jsonNode); - if (!errors.isEmpty()) { - throw new OpenSearchParseException( - "Validation failed: " - + Arrays.toString(errors.toArray(new ValidationMessage[0])) - + " for instance: " - + instanceString - + " with schema: " - + schemaString - ); + public static void validateSchema(String schemaString, String instanceString) { + try { + // parse the schema JSON as string + JsonNode schemaNode = MAPPER.readTree(schemaString); + JsonSchema schema = JsonSchemaFactory.getInstance(SpecVersion.VersionFlag.V202012).getSchema(schemaNode); + + // JSON data to validate + JsonNode jsonNode = MAPPER.readTree(instanceString); + + // Validate JSON node against the schema + Set errors = schema.validate(jsonNode); + if (!errors.isEmpty()) { + String errorMessage = errors.stream().map(ValidationMessage::getMessage).collect(Collectors.joining(", ")); + + throw new OpenSearchParseException( + "Validation failed: " + errorMessage + " for instance: " + instanceString + " with schema: " + schemaString + ); + } + } catch (JsonProcessingException e) { + throw new IllegalArgumentException("Invalid JSON format: " + e.getMessage(), e); + } catch (Exception e) { + throw new OpenSearchParseException("Schema validation failed: " + e.getMessage(), e); } } } diff --git a/common/src/main/resources/index-mappings/ml-agent.json b/common/src/main/resources/index-mappings/ml_agent.json similarity index 100% rename from common/src/main/resources/index-mappings/ml-agent.json rename to common/src/main/resources/index-mappings/ml_agent.json diff --git a/common/src/main/resources/index-mappings/ml-config.json b/common/src/main/resources/index-mappings/ml_config.json similarity index 100% rename from common/src/main/resources/index-mappings/ml-config.json rename to common/src/main/resources/index-mappings/ml_config.json diff --git a/common/src/main/resources/index-mappings/ml-connector.json b/common/src/main/resources/index-mappings/ml_connector.json similarity index 100% rename from common/src/main/resources/index-mappings/ml-connector.json rename to common/src/main/resources/index-mappings/ml_connector.json diff --git a/common/src/main/resources/index-mappings/ml-controller.json b/common/src/main/resources/index-mappings/ml_controller.json similarity index 100% rename from common/src/main/resources/index-mappings/ml-controller.json rename to common/src/main/resources/index-mappings/ml_controller.json diff --git a/common/src/main/resources/index-mappings/ml-memory-message.json b/common/src/main/resources/index-mappings/ml_memory_message.json similarity index 100% rename from common/src/main/resources/index-mappings/ml-memory-message.json rename to common/src/main/resources/index-mappings/ml_memory_message.json diff --git a/common/src/main/resources/index-mappings/ml-memory-meta.json b/common/src/main/resources/index-mappings/ml_memory_meta.json similarity index 100% rename from common/src/main/resources/index-mappings/ml-memory-meta.json rename to common/src/main/resources/index-mappings/ml_memory_meta.json diff --git a/common/src/main/resources/index-mappings/ml-model.json b/common/src/main/resources/index-mappings/ml_model.json similarity index 100% rename from common/src/main/resources/index-mappings/ml-model.json rename to common/src/main/resources/index-mappings/ml_model.json diff --git a/common/src/main/resources/index-mappings/ml-model-group.json b/common/src/main/resources/index-mappings/ml_model_group.json similarity index 100% rename from common/src/main/resources/index-mappings/ml-model-group.json rename to common/src/main/resources/index-mappings/ml_model_group.json diff --git a/common/src/main/resources/index-mappings/ml-task.json b/common/src/main/resources/index-mappings/ml_task.json similarity index 100% rename from common/src/main/resources/index-mappings/ml-task.json rename to common/src/main/resources/index-mappings/ml_task.json diff --git a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java index f9334db729..d55ac37fc1 100644 --- a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java +++ b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java @@ -67,23 +67,23 @@ public void testGetMappingFromFile() { + " }\n" + "}\n"; try { - String actualMapping = IndexUtils.getMappingFromFile("index-mappings/test-mapping.json"); + String actualMapping = IndexUtils.getMappingFromFile("index-mappings/test_mapping.json"); // comparing JsonObjects to avoid issues caused by eol character in different OS assertEquals(StringUtils.getJsonObjectFromString(expectedMapping), StringUtils.getJsonObjectFromString(actualMapping)); } catch (IOException e) { - throw new RuntimeException("Failed to read file at path: index-mappings/test-mapping.json"); + throw new RuntimeException("Failed to read file at path: index-mappings/test_mapping.json"); } } @Test public void testGetMappingFromFileFileNotFound() { - String path = "index-mappings/test-mapping-not-found.json"; + String path = "index-mappings/test_mapping_not_found.json"; IOException e = assertThrows(IOException.class, () -> IndexUtils.getMappingFromFile(path)); } @Test public void testGetMappingFromFilesMalformedJson() { - String path = "index-mappings/test-mapping-malformed.json"; + String path = "index-mappings/test_mapping_malformed.json"; IllegalArgumentException e = assertThrows(IllegalArgumentException.class, () -> IndexUtils.getMappingFromFile(path)); } diff --git a/common/src/test/resources/index-mappings/test-mapping.json b/common/src/test/resources/index-mappings/test_mapping.json similarity index 100% rename from common/src/test/resources/index-mappings/test-mapping.json rename to common/src/test/resources/index-mappings/test_mapping.json diff --git a/common/src/test/resources/index-mappings/test-mapping-malformed.json b/common/src/test/resources/index-mappings/test_mapping_malformed.json similarity index 100% rename from common/src/test/resources/index-mappings/test-mapping-malformed.json rename to common/src/test/resources/index-mappings/test_mapping_malformed.json From 4747861aac8db7ccc00a62f9877c292e22cc16aa Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Fri, 27 Dec 2024 15:17:00 -0800 Subject: [PATCH 29/29] refactor: cleaner comment Signed-off-by: Pavan Yekbote --- .../ml/common/utils/IndexUtils.java | 33 +++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java index 05aa9e791a..dbb27940d0 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java @@ -101,19 +101,26 @@ public static String replacePlaceholders(String mapping) throws IOException { } /** - * Checks if mapping is a valid json - * Validates mapping against a schema found in mappings/schema.json - * Schema validates the following: - * Below fields are present: - * "_meta" - * "_meta.schema_version" - * "properties" - * No additional fields at root level - * No additional fields in "_meta" object - * "properties" is an object type - * "_meta" is an object type - * "_meta_.schema_version" provided type is integer - * Note: Validation can be made more strict if a specific schema is defined for each index. + * Checks if the provided mapping is a valid JSON and validates it against a schema. + * + *

The schema is located at mappings/schema.json and enforces the following validations:

+ * + *
    + *
  • Mandatory fields: + *
      + *
    • _meta
    • + *
    • _meta.schema_version
    • + *
    • properties
    • + *
    + *
  • + *
  • No additional fields are allowed at the root level.
  • + *
  • No additional fields are allowed in the _meta object.
  • + *
  • properties must be an object type.
  • + *
  • _meta must be an object type.
  • + *
  • _meta.schema_version must be an integer.
  • + *
+ * + *

Note: Validation can be made stricter if a specific schema is defined for each index.

*/ public static void validateMapping(String mapping) throws IOException { if (mapping.isBlank() || !StringUtils.isJson(mapping)) {