From d7562b1b71d0a515f20e64d67e67d5cdfe221422 Mon Sep 17 00:00:00 2001 From: Subhobrata Dey Date: Tue, 16 Apr 2024 20:13:41 -0700 Subject: [PATCH] doc-level monitor fan-out approach Signed-off-by: Subhobrata Dey --- .../org/opensearch/alerting/AlertingPlugin.kt | 22 +- .../alerting/BucketLevelMonitorRunner.kt | 4 +- .../alerting/DocumentLevelMonitorRunner.kt | 1133 +++++----------- .../org/opensearch/alerting/MonitorRunner.kt | 4 +- .../alerting/MonitorRunnerExecutionContext.kt | 1 + .../alerting/MonitorRunnerService.kt | 97 +- .../alerting/QueryLevelMonitorRunner.kt | 4 +- .../action/DocLevelMonitorFanOutAction.kt | 15 + .../action/DocLevelMonitorFanOutRequest.kt | 101 ++ .../action/DocLevelMonitorFanOutResponse.kt | 92 ++ .../alerting/action/ExecuteMonitorRequest.kt | 9 +- .../alerting/action/ExecuteWorkflowRequest.kt | 7 +- .../model/DocumentLevelTriggerRunResult.kt | 34 +- .../alerting/model/IndexExecutionContext.kt | 58 +- .../alerting/settings/AlertingSettings.kt | 9 + .../TransportDocLevelMonitorFanOutAction.kt | 1183 +++++++++++++++++ .../TransportExecuteMonitorAction.kt | 12 +- .../TransportExecuteWorkflowAction.kt | 18 +- .../alerting/util/AlertingException.kt | 18 +- .../workflow/CompositeWorkflowRunner.kt | 23 +- .../alerting/workflow/WorkflowRunContext.kt | 44 +- .../alerting/workflow/WorkflowRunner.kt | 4 +- .../alerting/AlertingRestTestCase.kt | 43 +- .../alerting/DocumentMonitorRunnerIT.kt | 107 +- .../alerting/MonitorDataSourcesIT.kt | 25 +- .../org/opensearch/alerting/TestHelpers.kt | 6 + .../DocLevelMonitorFanOutRequestTests.kt | 90 ++ .../DocLevelMonitorFanOutResponseTests.kt | 56 + .../action/GetFindingsRequestTests.kt | 9 +- .../alerting/model/WriteableTests.kt | 36 +- .../resthandler/SecureWorkflowRestApiIT.kt | 4 +- .../TriggerExpressionResolverTests.kt | 18 +- alerting/src/test/resources/sample.pem | 16 +- alerting/src/test/resources/test-kirk.jks | Bin 4504 -> 3766 bytes 34 files changed, 2388 insertions(+), 914 deletions(-) create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutAction.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutRequest.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutResponse.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDocLevelMonitorFanOutAction.kt create mode 100644 alerting/src/test/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutRequestTests.kt create mode 100644 alerting/src/test/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutResponseTests.kt diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index f908f4ebe..0a6e0bea3 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -6,6 +6,7 @@ package org.opensearch.alerting import org.opensearch.action.ActionRequest +import org.opensearch.alerting.action.DocLevelMonitorFanOutAction import org.opensearch.alerting.action.ExecuteMonitorAction import org.opensearch.alerting.action.ExecuteWorkflowAction import org.opensearch.alerting.action.GetDestinationsAction @@ -55,6 +56,7 @@ import org.opensearch.alerting.transport.TransportAcknowledgeAlertAction import org.opensearch.alerting.transport.TransportAcknowledgeChainedAlertAction import org.opensearch.alerting.transport.TransportDeleteMonitorAction import org.opensearch.alerting.transport.TransportDeleteWorkflowAction +import org.opensearch.alerting.transport.TransportDocLevelMonitorFanOutAction import org.opensearch.alerting.transport.TransportExecuteMonitorAction import org.opensearch.alerting.transport.TransportExecuteWorkflowAction import org.opensearch.alerting.transport.TransportGetAlertsAction @@ -229,6 +231,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R ActionPlugin.ActionHandler(AlertingActions.DELETE_WORKFLOW_ACTION_TYPE, TransportDeleteWorkflowAction::class.java), ActionPlugin.ActionHandler(ExecuteWorkflowAction.INSTANCE, TransportExecuteWorkflowAction::class.java), ActionPlugin.ActionHandler(GetRemoteIndexesAction.INSTANCE, TransportGetRemoteIndexesAction::class.java), + ActionPlugin.ActionHandler(DocLevelMonitorFanOutAction.INSTANCE, TransportDocLevelMonitorFanOutAction::class.java) ) } @@ -263,6 +266,8 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R val settings = environment.settings() val lockService = LockService(client, clusterService) alertIndices = AlertIndices(settings, client, threadPool, clusterService) + val alertService = AlertService(client, xContentRegistry, alertIndices) + val triggerService = TriggerService(scriptService) runner = MonitorRunnerService .registerClusterService(clusterService) .registerClient(client) @@ -273,8 +278,8 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R .registerThreadPool(threadPool) .registerAlertIndices(alertIndices) .registerInputService(InputService(client, scriptService, namedWriteableRegistry, xContentRegistry, clusterService, settings)) - .registerTriggerService(TriggerService(scriptService)) - .registerAlertService(AlertService(client, xContentRegistry, alertIndices)) + .registerTriggerService(triggerService) + .registerAlertService(alertService) .registerDocLevelMonitorQueries(DocLevelMonitorQueries(client, clusterService)) .registerJvmStats(JvmStats.jvmStats()) .registerWorkflowService(WorkflowService(client, xContentRegistry)) @@ -305,7 +310,17 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R DeleteMonitorService.initialize(client, lockService) - return listOf(sweeper, scheduler, runner, scheduledJobIndices, docLevelMonitorQueries, destinationMigrationCoordinator, lockService) + return listOf( + sweeper, + scheduler, + runner, + scheduledJobIndices, + docLevelMonitorQueries, + destinationMigrationCoordinator, + lockService, + alertService, + triggerService + ) } override fun getSettings(): List> { @@ -336,6 +351,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R AlertingSettings.ALERT_HISTORY_RETENTION_PERIOD, AlertingSettings.ALERTING_MAX_MONITORS, AlertingSettings.PERCOLATE_QUERY_DOCS_SIZE_MEMORY_PERCENTAGE_LIMIT, + AlertingSettings.DOC_LEVEL_MONITOR_FAN_OUT_NODES, DOC_LEVEL_MONITOR_SHARD_FETCH_SIZE, AlertingSettings.PERCOLATE_QUERY_MAX_NUM_DOCS_IN_MEMORY, AlertingSettings.REQUEST_TIMEOUT, diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/BucketLevelMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/BucketLevelMonitorRunner.kt index 0813a6d2f..7161ed764 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/BucketLevelMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/BucketLevelMonitorRunner.kt @@ -55,6 +55,7 @@ import org.opensearch.search.aggregations.AggregatorFactories import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder import org.opensearch.search.builder.SearchSourceBuilder +import org.opensearch.transport.TransportService import java.time.Instant import java.util.UUID @@ -68,7 +69,8 @@ object BucketLevelMonitorRunner : MonitorRunner() { periodEnd: Instant, dryrun: Boolean, workflowRunContext: WorkflowRunContext?, - executionId: String + executionId: String, + transportService: TransportService ): MonitorRunResult { val roles = MonitorRunnerService.getRolesForMonitor(monitor) logger.debug("Running monitor: ${monitor.name} with roles: $roles Thread: ${Thread.currentThread().name}") diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/DocumentLevelMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/DocumentLevelMonitorRunner.kt index 51a32b642..4005a8cc0 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/DocumentLevelMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/DocumentLevelMonitorRunner.kt @@ -7,93 +7,51 @@ package org.opensearch.alerting import org.apache.logging.log4j.LogManager import org.opensearch.ExceptionsHelper -import org.opensearch.OpenSearchStatusException -import org.opensearch.action.DocWriteRequest -import org.opensearch.action.admin.indices.refresh.RefreshAction -import org.opensearch.action.admin.indices.refresh.RefreshRequest -import org.opensearch.action.bulk.BulkRequest -import org.opensearch.action.bulk.BulkResponse -import org.opensearch.action.get.MultiGetItemResponse -import org.opensearch.action.get.MultiGetRequest -import org.opensearch.action.index.IndexRequest -import org.opensearch.action.search.SearchAction -import org.opensearch.action.search.SearchRequest -import org.opensearch.action.search.SearchResponse -import org.opensearch.alerting.model.AlertContext +import org.opensearch.Version +import org.opensearch.action.ActionListenerResponseHandler +import org.opensearch.action.support.GroupedActionListener +import org.opensearch.alerting.action.DocLevelMonitorFanOutAction +import org.opensearch.alerting.action.DocLevelMonitorFanOutRequest +import org.opensearch.alerting.action.DocLevelMonitorFanOutResponse +import org.opensearch.alerting.model.ActionRunResult import org.opensearch.alerting.model.DocumentLevelTriggerRunResult import org.opensearch.alerting.model.IndexExecutionContext import org.opensearch.alerting.model.InputRunResults -import org.opensearch.alerting.model.MonitorMetadata import org.opensearch.alerting.model.MonitorRunResult -import org.opensearch.alerting.model.userErrorMessage -import org.opensearch.alerting.opensearchapi.convertToMap -import org.opensearch.alerting.opensearchapi.suspendUntil -import org.opensearch.alerting.script.DocumentLevelTriggerExecutionContext import org.opensearch.alerting.util.AlertingException import org.opensearch.alerting.util.IndexUtils -import org.opensearch.alerting.util.defaultToPerExecutionAction -import org.opensearch.alerting.util.getActionExecutionPolicy -import org.opensearch.alerting.util.getCancelAfterTimeInterval -import org.opensearch.alerting.util.parseSampleDocTags -import org.opensearch.alerting.util.printsSampleDocData import org.opensearch.alerting.workflow.WorkflowRunContext -import org.opensearch.client.node.NodeClient import org.opensearch.cluster.metadata.IndexMetadata -import org.opensearch.cluster.routing.Preference +import org.opensearch.cluster.node.DiscoveryNode import org.opensearch.cluster.routing.ShardRouting import org.opensearch.cluster.service.ClusterService -import org.opensearch.common.unit.TimeValue -import org.opensearch.common.xcontent.XContentFactory -import org.opensearch.common.xcontent.XContentType -import org.opensearch.commons.alerting.AlertingPluginInterface -import org.opensearch.commons.alerting.action.PublishFindingsRequest -import org.opensearch.commons.alerting.action.SubscribeFindingsResponse -import org.opensearch.commons.alerting.model.ActionExecutionResult -import org.opensearch.commons.alerting.model.Alert import org.opensearch.commons.alerting.model.DocLevelMonitorInput import org.opensearch.commons.alerting.model.DocLevelQuery -import org.opensearch.commons.alerting.model.DocumentLevelTrigger -import org.opensearch.commons.alerting.model.Finding import org.opensearch.commons.alerting.model.Monitor -import org.opensearch.commons.alerting.model.action.PerAlertActionScope -import org.opensearch.commons.alerting.util.string import org.opensearch.core.action.ActionListener -import org.opensearch.core.common.bytes.BytesReference +import org.opensearch.core.common.breaker.CircuitBreakingException +import org.opensearch.core.common.io.stream.Writeable +import org.opensearch.core.index.shard.ShardId import org.opensearch.core.rest.RestStatus -import org.opensearch.core.xcontent.ToXContent -import org.opensearch.core.xcontent.XContentBuilder import org.opensearch.index.IndexNotFoundException -import org.opensearch.index.query.BoolQueryBuilder -import org.opensearch.index.query.Operator -import org.opensearch.index.query.QueryBuilders import org.opensearch.index.seqno.SequenceNumbers -import org.opensearch.indices.IndexClosedException -import org.opensearch.percolator.PercolateQueryBuilderExt -import org.opensearch.search.SearchHit -import org.opensearch.search.SearchHits -import org.opensearch.search.builder.SearchSourceBuilder -import org.opensearch.search.fetch.subphase.FetchSourceContext -import org.opensearch.search.sort.SortOrder +import org.opensearch.node.NodeClosedException +import org.opensearch.transport.ActionNotFoundTransportException +import org.opensearch.transport.ConnectTransportException +import org.opensearch.transport.RemoteTransportException +import org.opensearch.transport.TransportException +import org.opensearch.transport.TransportRequestOptions +import org.opensearch.transport.TransportService import java.io.IOException import java.time.Instant -import java.util.UUID -import java.util.stream.Collectors +import kotlin.coroutines.resume +import kotlin.coroutines.resumeWithException +import kotlin.coroutines.suspendCoroutine import kotlin.math.max class DocumentLevelMonitorRunner : MonitorRunner() { private val logger = LogManager.getLogger(javaClass) - var nonPercolateSearchesTimeTakenStat = 0L - var percolateQueriesTimeTakenStat = 0L - var totalDocsQueriedStat = 0L - var docTransformTimeTakenStat = 0L - var totalDocsSizeInBytesStat = 0L - var docsSizeOfBatchInBytes = 0L - /* Contains list of docs source that are held in memory to submit to percolate query against query index. - * Docs are fetched from the source index per shard and transformed.*/ - val transformedDocs = mutableListOf>() - - // Maps a finding ID to the related document. - private val findingIdToDocSource = mutableMapOf() + private var totalTimeTakenStat = 0L override suspend fun runMonitor( monitor: Monitor, @@ -102,9 +60,11 @@ class DocumentLevelMonitorRunner : MonitorRunner() { periodEnd: Instant, dryrun: Boolean, workflowRunContext: WorkflowRunContext?, - executionId: String + executionId: String, + transportService: TransportService ): MonitorRunResult { logger.debug("Document-level-monitor is running ...") + val startTime = System.currentTimeMillis() val isTempMonitor = dryrun || monitor.id == Monitor.NO_ID var monitorResult = MonitorRunResult(monitor.name, periodStart, periodEnd) monitorCtx.findingsToTriggeredQueries = mutableMapOf() @@ -141,10 +101,6 @@ class DocumentLevelMonitorRunner : MonitorRunner() { val updatedLastRunContext = lastRunContext.toMutableMap() - val queryToDocIds = mutableMapOf>() - val inputRunResults = mutableMapOf>() - val docsToQueries = mutableMapOf>() - try { // Resolve all passed indices to concrete indices val allConcreteIndices = IndexUtils.resolveAllIndices( @@ -178,6 +134,7 @@ class DocumentLevelMonitorRunner : MonitorRunner() { val concreteIndicesSeenSoFar = mutableListOf() val updatedIndexNames = mutableListOf() + val docLevelMonitorFanOutResponses: MutableList = mutableListOf() docLevelMonitorInput.indices.forEach { indexName -> var concreteIndices = IndexUtils.resolveAllIndices( listOf(indexName), @@ -228,7 +185,11 @@ class DocumentLevelMonitorRunner : MonitorRunner() { if (IndexUtils.isAlias(indexName, monitorCtx.clusterService!!.state()) || IndexUtils.isDataStream(indexName, monitorCtx.clusterService!!.state()) ) { - if (concreteIndexName == IndexUtils.getWriteIndex(indexName, monitorCtx.clusterService!!.state())) { + if (concreteIndexName == IndexUtils.getWriteIndex( + indexName, + monitorCtx.clusterService!!.state() + ) + ) { updatedLastRunContext.remove(lastWriteIndex) updatedLastRunContext[concreteIndexName] = indexUpdatedRunContext } @@ -243,30 +204,9 @@ class DocumentLevelMonitorRunner : MonitorRunner() { // update lastRunContext if its a temp monitor as we only want to view the last bit of data then // TODO: If dryrun, we should make it so we limit the search as this could still potentially give us lots of data if (isTempMonitor) { - indexLastRunContext[shard] = max(-1, (indexUpdatedRunContext[shard] as String).toInt() - 10) - } - } - - val fieldsToBeQueried = mutableSetOf() - if (monitorCtx.fetchOnlyQueryFieldNames) { - for (it in queries) { - if (it.queryFieldNames.isEmpty()) { - fieldsToBeQueried.clear() - logger.debug( - "Monitor ${monitor.id} : " + - "Doc Level query ${it.id} : ${it.query} doesn't have queryFieldNames populated. " + - "Cannot optimize monitor to fetch only query-relevant fields. " + - "Querying entire doc source." - ) - break - } - fieldsToBeQueried.addAll(it.queryFieldNames) + indexLastRunContext[shard] = + max(-1, (indexUpdatedRunContext[shard] as String).toInt() - 10) } - if (fieldsToBeQueried.isNotEmpty()) - logger.debug( - "Monitor ${monitor.id} Querying only fields " + - "${fieldsToBeQueried.joinToString()} instead of entire _source of documents" - ) } val indexExecutionContext = IndexExecutionContext( queries, @@ -274,90 +214,150 @@ class DocumentLevelMonitorRunner : MonitorRunner() { indexUpdatedRunContext, updatedIndexName, concreteIndexName, + updatedIndexNames, + concreteIndices, conflictingFields.toList(), matchingDocIdsPerIndex?.get(concreteIndexName), ) - fetchShardDataAndMaybeExecutePercolateQueries( - monitor, + val shards = mutableSetOf() + shards.addAll(indexUpdatedRunContext.keys) + shards.remove("index") + shards.remove("shards_count") + + val nodeMap = getNodes(monitorCtx) + val nodeShardAssignments = distributeShards( monitorCtx, - indexExecutionContext, - monitorMetadata, - inputRunResults, - docsToQueries, - updatedIndexNames, - concreteIndicesSeenSoFar, - ArrayList(fieldsToBeQueried) - ) { shard, maxSeqNo -> // function passed to update last run context with new max sequence number - indexExecutionContext.updatedLastRunContext[shard] = maxSeqNo - } - } - } - /* if all indices are covered still in-memory docs size limit is not breached we would need to submit - the percolate query at the end */ - if (transformedDocs.isNotEmpty()) { - performPercolateQueryAndResetCounters( - monitorCtx, - monitor, - monitorMetadata, - updatedIndexNames, - concreteIndicesSeenSoFar, - inputRunResults, - docsToQueries, - ) - } - monitorResult = monitorResult.copy(inputResults = InputRunResults(listOf(inputRunResults))) + nodeMap.keys.toList(), + shards.toList(), + concreteIndexName + ) + + val responses: Collection = suspendCoroutine { cont -> + val listener = GroupedActionListener( + object : ActionListener> { + override fun onResponse(response: Collection) { + cont.resume(response) + } + + override fun onFailure(e: Exception) { + if (e.cause is Exception) + cont.resumeWithException(e.cause as Exception) + else + cont.resumeWithException(e) + } + }, + nodeShardAssignments.size + ) + val responseReader = Writeable.Reader { + DocLevelMonitorFanOutResponse(it) + } + for (node in nodeMap) { + if (nodeShardAssignments.containsKey(node.key)) { + val docLevelMonitorFanOutRequest = DocLevelMonitorFanOutRequest( + monitor, + dryrun, + monitorMetadata, + executionId, + indexExecutionContext, + nodeShardAssignments[node.key]!!.toList(), + concreteIndicesSeenSoFar, + workflowRunContext + ) - /* - populate the map queryToDocIds with pairs of - this fixes the issue of passing id, name, tags fields of DocLevelQuery object correctly to TriggerExpressionParser - */ - queries.forEach { - if (inputRunResults.containsKey(it.id)) { - queryToDocIds[it] = inputRunResults[it.id]!! + transportService.sendRequest( + node.value, + DocLevelMonitorFanOutAction.NAME, + docLevelMonitorFanOutRequest, + TransportRequestOptions.EMPTY, + object : ActionListenerResponseHandler( + listener, + responseReader + ) { + override fun handleException(e: TransportException) { + val cause = e.unwrapCause() + if (cause is ConnectTransportException || + ( + e is RemoteTransportException && + ( + cause is NodeClosedException || + cause is CircuitBreakingException || + cause is ActionNotFoundTransportException + ) + ) + ) { + val localNode = monitorCtx.clusterService!!.localNode() + // retry in local node + transportService.sendRequest( + localNode, + DocLevelMonitorFanOutAction.NAME, + docLevelMonitorFanOutRequest, + TransportRequestOptions.EMPTY, + object : + ActionListenerResponseHandler( + listener, + responseReader + ) { + override fun handleException(e: TransportException) { + logger.error("Fan out retry failed in node ${localNode.id}", e) + listener.onResponse( + DocLevelMonitorFanOutResponse( + "", + "", + "", + mutableMapOf(), + exception = if (e.cause is AlertingException) { + e.cause as AlertingException + } else { + AlertingException.wrap(e) as AlertingException + } + ) + ) + } + + override fun handleResponse(response: DocLevelMonitorFanOutResponse) { + listener.onResponse(response) + } + } + ) + } else { + logger.error("Fan out failed in node ${node.key}", e) + listener.onResponse( + DocLevelMonitorFanOutResponse( + "", + "", + "", + mutableMapOf(), + exception = if (e.cause is AlertingException) { + e.cause as AlertingException + } else { + AlertingException.wrap(e) as AlertingException + } + ) + ) + } + } + + override fun handleResponse(response: DocLevelMonitorFanOutResponse) { + listener.onResponse(response) + } + } + ) + } + } + } + docLevelMonitorFanOutResponses.addAll(responses) } } - val idQueryMap: Map = queries.associateBy { it.id } - - val triggerResults = mutableMapOf() - // If there are no triggers defined, we still want to generate findings - if (monitor.triggers.isEmpty()) { - if (dryrun == false && monitor.id != Monitor.NO_ID) { - createFindings(monitor, monitorCtx, docsToQueries, idQueryMap, true) - } - } else { - monitor.triggers.forEach { - triggerResults[it.id] = runForEachDocTrigger( - monitorCtx, - monitorResult, - it as DocumentLevelTrigger, - monitor, - idQueryMap, - docsToQueries, - queryToDocIds, - dryrun, - executionId = executionId, - workflowRunContext = workflowRunContext - ) - } + val isFanOutSuccessful = checkAndThrowExceptionIfAllFanOutsFailed(docLevelMonitorFanOutResponses) + if (isFanOutSuccessful != null) { + throw isFanOutSuccessful } - // Don't update monitor if this is a test monitor + updateLastRunContextFromFanOutResponses(docLevelMonitorFanOutResponses, updatedLastRunContext) + val triggerResults = buildTriggerResults(docLevelMonitorFanOutResponses) + val inputRunResults = buildInputRunResults(docLevelMonitorFanOutResponses) if (!isTempMonitor) { - // If any error happened during trigger execution, upsert monitor error alert - val errorMessage = constructErrorMessageFromTriggerResults(triggerResults = triggerResults) - if (errorMessage.isNotEmpty()) { - monitorCtx.alertService!!.upsertMonitorErrorAlert( - monitor = monitor, - errorMessage = errorMessage, - executionId = executionId, - workflowRunContext - ) - } else { - onSuccessfulMonitorRun(monitorCtx, monitor) - } - MonitorMetadataService.upsertMetadata( monitorMetadata.copy(lastRunContext = updatedLastRunContext), true @@ -366,9 +366,8 @@ class DocumentLevelMonitorRunner : MonitorRunner() { // Clean up any queries created by the dry run monitor monitorCtx.docLevelMonitorQueries!!.deleteDocLevelQueriesOnDryRun(monitorMetadata) } - // TODO: Update the Document as part of the Trigger and return back the trigger action result - return monitorResult.copy(triggerResults = triggerResults) + return monitorResult.copy(triggerResults = triggerResults, inputResults = inputRunResults) } catch (e: Exception) { val errorMessage = ExceptionsHelper.detailedMessage(e) monitorCtx.alertService!!.upsertMonitorErrorAlert(monitor, errorMessage, executionId, workflowRunContext) @@ -380,282 +379,134 @@ class DocumentLevelMonitorRunner : MonitorRunner() { ) return monitorResult.copy(error = alertingException, inputResults = InputRunResults(emptyList(), alertingException)) } finally { + val endTime = System.currentTimeMillis() + totalTimeTakenStat = endTime - startTime logger.debug( - "PERF_DEBUG_STATS: Monitor ${monitor.id} " + - "Time spent on fetching data from shards in millis: $nonPercolateSearchesTimeTakenStat" - ) - logger.debug( - "PERF_DEBUG_STATS: Monitor {} Time spent on percolate queries in millis: {}", - monitor.id, - percolateQueriesTimeTakenStat - ) - logger.debug( - "PERF_DEBUG_STATS: Monitor {} Time spent on transforming doc fields in millis: {}", + "Monitor {} Time spent on monitor run: {}", monitor.id, - docTransformTimeTakenStat + totalTimeTakenStat ) - logger.debug("PERF_DEBUG_STATS: Monitor {} Num docs queried: {}", monitor.id, totalDocsQueriedStat) } } - private suspend fun onSuccessfulMonitorRun(monitorCtx: MonitorRunnerExecutionContext, monitor: Monitor) { - monitorCtx.alertService!!.clearMonitorErrorAlert(monitor) - if (monitor.dataSources.alertsHistoryIndex != null) { - monitorCtx.alertService!!.moveClearedErrorAlertsToHistory( - monitor.id, - monitor.dataSources.alertsIndex, - monitor.dataSources.alertsHistoryIndex!! - ) - } - } + private fun updateLastRunContextFromFanOutResponses( + docLevelMonitorFanOutResponses: MutableList, + updatedLastRunContext: MutableMap>, + ) { - private fun constructErrorMessageFromTriggerResults( - triggerResults: MutableMap? = null - ): String { - var errorMessage = "" - if (triggerResults != null) { - val triggersErrorBuilder = StringBuilder() - triggerResults.forEach { - if (it.value.error != null) { - triggersErrorBuilder.append("[${it.key}]: [${it.value.error!!.userErrorMessage()}]").append(" | ") + // Prepare updatedLastRunContext for each index + for (indexName in updatedLastRunContext.keys) { + for (fanOutResponse in docLevelMonitorFanOutResponses) { + if (fanOutResponse.exception == null) { + // fanOutResponse.lastRunContexts //updatedContexts for relevant shards + val indexLastRunContext = updatedLastRunContext[indexName] as MutableMap + + if (fanOutResponse.lastRunContexts.contains("index") && fanOutResponse.lastRunContexts["index"] == indexName) { + fanOutResponse.lastRunContexts.keys.forEach { + + val seq_no = fanOutResponse.lastRunContexts[it].toString().toIntOrNull() + if ( + it != "shards_count" && + it != "index" && + seq_no != null && + seq_no >= 0 + ) { + indexLastRunContext[it] = seq_no + } + } + } } } - if (triggersErrorBuilder.isNotEmpty()) { - errorMessage = "Trigger errors: $triggersErrorBuilder" - } } - return errorMessage } - private suspend fun runForEachDocTrigger( - monitorCtx: MonitorRunnerExecutionContext, - monitorResult: MonitorRunResult, - trigger: DocumentLevelTrigger, - monitor: Monitor, - idQueryMap: Map, - docsToQueries: MutableMap>, - queryToDocIds: Map>, - dryrun: Boolean, - workflowRunContext: WorkflowRunContext?, - executionId: String - ): DocumentLevelTriggerRunResult { - val triggerCtx = DocumentLevelTriggerExecutionContext(monitor, trigger) - val triggerResult = monitorCtx.triggerService!!.runDocLevelTrigger(monitor, trigger, queryToDocIds) - - val triggerFindingDocPairs = mutableListOf>() - - // TODO: Implement throttling for findings - val findingToDocPairs = createFindings( - monitor, - monitorCtx, - docsToQueries, - idQueryMap, - !dryrun && monitor.id != Monitor.NO_ID, - executionId - ) - - findingToDocPairs.forEach { - // Only pick those entries whose docs have triggers associated with them - if (triggerResult.triggeredDocs.contains(it.second)) { - triggerFindingDocPairs.add(Pair(it.first, it.second)) - } - } - - val actionCtx = triggerCtx.copy( - triggeredDocs = triggerResult.triggeredDocs, - relatedFindings = findingToDocPairs.map { it.first }, - error = monitorResult.error ?: triggerResult.error - ) - - if (printsSampleDocData(trigger) && triggerFindingDocPairs.isNotEmpty()) - getDocSources( - findingToDocPairs = findingToDocPairs, - monitorCtx = monitorCtx, - monitor = monitor - ) - - val alerts = mutableListOf() - val alertContexts = mutableListOf() - triggerFindingDocPairs.forEach { - val alert = monitorCtx.alertService!!.composeDocLevelAlert( - listOf(it.first), - listOf(it.second), - triggerCtx, - monitorResult.alertError() ?: triggerResult.alertError(), - executionId = executionId, - workflorwRunContext = workflowRunContext - ) - alerts.add(alert) - - val docSource = findingIdToDocSource[alert.findingIds.first()]?.response?.convertToMap() - - alertContexts.add( - AlertContext( - alert = alert, - associatedQueries = alert.findingIds.flatMap { findingId -> - monitorCtx.findingsToTriggeredQueries?.getOrDefault(findingId, emptyList()) ?: emptyList() - }, - sampleDocs = listOfNotNull(docSource) - ) - ) - } - - val shouldDefaultToPerExecution = defaultToPerExecutionAction( - monitorCtx.maxActionableAlertCount, - monitorId = monitor.id, - triggerId = trigger.id, - totalActionableAlertCount = alerts.size, - monitorOrTriggerError = actionCtx.error - ) - - for (action in trigger.actions) { - val actionExecutionScope = action.getActionExecutionPolicy(monitor)!!.actionExecutionScope - if (actionExecutionScope is PerAlertActionScope && !shouldDefaultToPerExecution) { - for (alertContext in alertContexts) { - val actionResults = this.runAction(action, actionCtx.copy(alerts = listOf(alertContext)), monitorCtx, monitor, dryrun) - triggerResult.actionResultsMap.getOrPut(alertContext.alert.id) { mutableMapOf() } - triggerResult.actionResultsMap[alertContext.alert.id]?.set(action.id, actionResults) - } - } else if (alertContexts.isNotEmpty()) { - val actionResults = this.runAction(action, actionCtx.copy(alerts = alertContexts), monitorCtx, monitor, dryrun) - for (alert in alerts) { - triggerResult.actionResultsMap.getOrPut(alert.id) { mutableMapOf() } - triggerResult.actionResultsMap[alert.id]?.set(action.id, actionResults) - } - } - } - - // Alerts are saved after the actions since if there are failures in the actions, they can be stated in the alert - if (!dryrun && monitor.id != Monitor.NO_ID) { - val updatedAlerts = alerts.map { alert -> - val actionResults = triggerResult.actionResultsMap.getOrDefault(alert.id, emptyMap()) - val actionExecutionResults = actionResults.values.map { actionRunResult -> - ActionExecutionResult(actionRunResult.actionId, actionRunResult.executionTime, if (actionRunResult.throttled) 1 else 0) - } - alert.copy(actionExecutionResults = actionExecutionResults) - } - - monitorCtx.retryPolicy?.let { - monitorCtx.alertService!!.saveAlerts( - monitor.dataSources, - updatedAlerts, - it, - routingId = monitor.id - ) + private fun checkAndThrowExceptionIfAllFanOutsFailed( + docLevelMonitorFanOutResponses: MutableList + ): AlertingException? { + val exceptions = mutableListOf() + for (res in docLevelMonitorFanOutResponses) { + if (res.exception == null) { + return null + } else { + exceptions.add(res.exception) } } - return triggerResult + return AlertingException.merge(*exceptions.toTypedArray()) } - /** - * 1. Bulk index all findings based on shouldCreateFinding flag - * 2. invoke publishFinding() to kickstart auto-correlations - * 3. Returns a list of pairs for finding id to doc id - */ - private suspend fun createFindings( - monitor: Monitor, - monitorCtx: MonitorRunnerExecutionContext, - docsToQueries: MutableMap>, - idQueryMap: Map, - shouldCreateFinding: Boolean, - workflowExecutionId: String? = null, - ): List> { - - val findingDocPairs = mutableListOf>() - val findings = mutableListOf() - val indexRequests = mutableListOf() - val findingsToTriggeredQueries = mutableMapOf>() - - docsToQueries.forEach { - val triggeredQueries = it.value.map { queryId -> idQueryMap[queryId]!! } - - // Before the "|" is the doc id and after the "|" is the index - val docIndex = it.key.split("|") - - val finding = Finding( - id = UUID.randomUUID().toString(), - relatedDocIds = listOf(docIndex[0]), - correlatedDocIds = listOf(docIndex[0]), - monitorId = monitor.id, - monitorName = monitor.name, - index = docIndex[1], - docLevelQueries = triggeredQueries, - timestamp = Instant.now(), - executionId = workflowExecutionId - ) - findingDocPairs.add(Pair(finding.id, it.key)) - findings.add(finding) - findingsToTriggeredQueries[finding.id] = triggeredQueries - - val findingStr = - finding.toXContent(XContentBuilder.builder(XContentType.JSON.xContent()), ToXContent.EMPTY_PARAMS) - .string() - logger.debug("Findings: $findingStr") + private fun buildTriggerResults( + docLevelMonitorFanOutResponses: MutableList, + ): MutableMap { + val triggerResults = mutableMapOf() + val triggerErrorMap = mutableMapOf>() + for (res in docLevelMonitorFanOutResponses) { + if (res.exception == null) { + for (triggerId in res.triggerResults.keys) { + val documentLevelTriggerRunResult = res.triggerResults[triggerId] + if (documentLevelTriggerRunResult != null) { + if (false == triggerResults.contains(triggerId)) { + triggerResults[triggerId] = documentLevelTriggerRunResult + triggerErrorMap[triggerId] = if (documentLevelTriggerRunResult.error != null) { + val error = if (documentLevelTriggerRunResult.error is AlertingException) { + documentLevelTriggerRunResult.error as AlertingException + } else { + AlertingException.wrap(documentLevelTriggerRunResult.error!!) as AlertingException + } + mutableListOf(error) + } else { + mutableListOf() + } + } else { + val currVal = triggerResults[triggerId] + val newTriggeredDocs = mutableListOf() + newTriggeredDocs.addAll(currVal!!.triggeredDocs) + newTriggeredDocs.addAll(documentLevelTriggerRunResult.triggeredDocs) + val newActionResults = mutableMapOf>() + newActionResults.putAll(currVal.actionResultsMap) + newActionResults.putAll(documentLevelTriggerRunResult.actionResultsMap) + triggerResults[triggerId] = currVal.copy( + triggeredDocs = newTriggeredDocs, + actionResultsMap = newActionResults + ) - if (shouldCreateFinding) { - indexRequests += IndexRequest(monitor.dataSources.findingsIndex) - .source(findingStr, XContentType.JSON) - .id(finding.id) - .opType(DocWriteRequest.OpType.CREATE) + if (documentLevelTriggerRunResult.error != null) { + triggerErrorMap[triggerId]!!.add(documentLevelTriggerRunResult.error as AlertingException) + } + } + } + } } } - if (indexRequests.isNotEmpty()) { - bulkIndexFindings(monitor, monitorCtx, indexRequests) - } - - try { - findings.forEach { finding -> - publishFinding(monitor, monitorCtx, finding) + triggerErrorMap.forEach { triggerId, errorList -> + if (errorList.isNotEmpty()) { + triggerResults[triggerId]!!.error = AlertingException.merge(*errorList.toTypedArray()) } - } catch (e: Exception) { - // suppress exception - logger.error("Optional finding callback failed", e) } - - if (monitorCtx.findingsToTriggeredQueries == null) monitorCtx.findingsToTriggeredQueries = findingsToTriggeredQueries - else monitorCtx.findingsToTriggeredQueries = monitorCtx.findingsToTriggeredQueries!! + findingsToTriggeredQueries - - return findingDocPairs + return triggerResults } - private suspend fun bulkIndexFindings( - monitor: Monitor, - monitorCtx: MonitorRunnerExecutionContext, - indexRequests: List - ) { - indexRequests.chunked(monitorCtx.findingsIndexBatchSize).forEach { batch -> - val bulkResponse: BulkResponse = monitorCtx.client!!.suspendUntil { - bulk(BulkRequest().add(batch), it) - } - if (bulkResponse.hasFailures()) { - bulkResponse.items.forEach { item -> - if (item.isFailed) { - logger.error("Failed indexing the finding ${item.id} of monitor [${monitor.id}]") + private fun buildInputRunResults(docLevelMonitorFanOutResponses: MutableList): InputRunResults { + val inputRunResults = mutableMapOf>() + val errors: MutableList = mutableListOf() + for (response in docLevelMonitorFanOutResponses) { + if (response.exception == null) { + if (response.inputResults.error != null) { + if (response.inputResults.error is AlertingException) { + errors.add(response.inputResults.error) + } else { + errors.add(AlertingException.wrap(response.inputResults.error) as AlertingException) + } + } + val partialResult = response.inputResults.results + for (result in partialResult) { + for (id in result.keys) { + inputRunResults.getOrPut(id) { mutableSetOf() }.addAll(result[id] as Collection) } } - } else { - logger.debug("[${bulkResponse.items.size}] All findings successfully indexed.") } } - monitorCtx.client!!.execute(RefreshAction.INSTANCE, RefreshRequest(monitor.dataSources.findingsIndex)) - } - - private fun publishFinding( - monitor: Monitor, - monitorCtx: MonitorRunnerExecutionContext, - finding: Finding - ) { - val publishFindingsRequest = PublishFindingsRequest(monitor.id, finding) - AlertingPluginInterface.publishFinding( - monitorCtx.client!! as NodeClient, - publishFindingsRequest, - object : ActionListener { - override fun onResponse(response: SubscribeFindingsResponse) {} - - override fun onFailure(e: Exception) {} - } - ) + return InputRunResults(listOf(inputRunResults), if (!errors.isEmpty()) AlertingException.merge(*errors.toTypedArray()) else null) } private fun initializeNewLastRunContext( @@ -704,436 +555,44 @@ class DocumentLevelMonitorRunner : MonitorRunner() { return allShards.filter { it.primary() }.size } - /** 1. Fetch data per shard for given index. (only 10000 docs are fetched. - * needs to be converted to scroll if not performant enough) - * 2. Transform documents to conform to format required for percolate query - * 3a. Check if docs in memory are crossing threshold defined by setting. - * 3b. If yes, perform percolate query and update docToQueries Map with all hits from percolate queries */ - private suspend fun fetchShardDataAndMaybeExecutePercolateQueries( - monitor: Monitor, - monitorCtx: MonitorRunnerExecutionContext, - indexExecutionCtx: IndexExecutionContext, - monitorMetadata: MonitorMetadata, - inputRunResults: MutableMap>, - docsToQueries: MutableMap>, - monitorInputIndices: List, - concreteIndices: List, - fieldsToBeQueried: List, - updateLastRunContext: (String, String) -> Unit - ) { - val count: Int = indexExecutionCtx.updatedLastRunContext["shards_count"] as Int - for (i: Int in 0 until count) { - val shard = i.toString() - try { - val prevSeqNo = indexExecutionCtx.lastRunContext[shard].toString().toLongOrNull() - val from = prevSeqNo ?: SequenceNumbers.NO_OPS_PERFORMED - var to: Long = Long.MAX_VALUE - while (to >= from) { - val hits: SearchHits = searchShard( - monitorCtx, - indexExecutionCtx.concreteIndexName, - shard, - from, - to, - indexExecutionCtx.docIds, - fieldsToBeQueried, - ) - if (hits.hits.isEmpty()) { - if (to == Long.MAX_VALUE) { - updateLastRunContext(shard, (prevSeqNo ?: SequenceNumbers.NO_OPS_PERFORMED).toString()) // didn't find any docs - } - break - } - if (to == Long.MAX_VALUE) { // max sequence number of shard needs to be computed - updateLastRunContext(shard, hits.hits[0].seqNo.toString()) - } - val leastSeqNoFromHits = hits.hits.last().seqNo - to = leastSeqNoFromHits - 1 - val startTime = System.currentTimeMillis() - transformedDocs.addAll( - transformSearchHitsAndReconstructDocs( - hits, - indexExecutionCtx.indexName, - indexExecutionCtx.concreteIndexName, - monitor.id, - indexExecutionCtx.conflictingFields, - ) - ) - if ( - transformedDocs.isNotEmpty() && - shouldPerformPercolateQueryAndFlushInMemoryDocs(transformedDocs.size, monitorCtx) - ) { - performPercolateQueryAndResetCounters( - monitorCtx, - monitor, - monitorMetadata, - monitorInputIndices, - concreteIndices, - inputRunResults, - docsToQueries, - ) - } - docTransformTimeTakenStat += System.currentTimeMillis() - startTime - } - } catch (e: Exception) { - logger.error( - "Monitor ${monitor.id} :" + - "Failed to run fetch data from shard [$shard] of index [${indexExecutionCtx.concreteIndexName}]. " + - "Error: ${e.message}", - e - ) - if (e is IndexClosedException) { - throw e - } - } - if ( - transformedDocs.isNotEmpty() && - shouldPerformPercolateQueryAndFlushInMemoryDocs(transformedDocs.size, monitorCtx) - ) { - performPercolateQueryAndResetCounters( - monitorCtx, - monitor, - monitorMetadata, - monitorInputIndices, - concreteIndices, - inputRunResults, - docsToQueries, - ) - } - } - } - - private fun shouldPerformPercolateQueryAndFlushInMemoryDocs( - numDocs: Int, - monitorCtx: MonitorRunnerExecutionContext, - ): Boolean { - return isInMemoryDocsSizeExceedingMemoryLimit(docsSizeOfBatchInBytes, monitorCtx) || - isInMemoryNumDocsExceedingMaxDocsPerPercolateQueryLimit(numDocs, monitorCtx) - } - - private suspend fun performPercolateQueryAndResetCounters( - monitorCtx: MonitorRunnerExecutionContext, - monitor: Monitor, - monitorMetadata: MonitorMetadata, - monitorInputIndices: List, - concreteIndices: List, - inputRunResults: MutableMap>, - docsToQueries: MutableMap>, - ) { - try { - val percolateQueryResponseHits = runPercolateQueryOnTransformedDocs( - monitorCtx, - transformedDocs, - monitor, - monitorMetadata, - concreteIndices, - monitorInputIndices, - ) - - percolateQueryResponseHits.forEach { hit -> - var id = hit.id - concreteIndices.forEach { id = id.replace("_${it}_${monitor.id}", "") } - monitorInputIndices.forEach { id = id.replace("_${it}_${monitor.id}", "") } - val docIndices = hit.field("_percolator_document_slot").values.map { it.toString().toInt() } - docIndices.forEach { idx -> - val docIndex = "${transformedDocs[idx].first}|${transformedDocs[idx].second.concreteIndexName}" - inputRunResults.getOrPut(id) { mutableSetOf() }.add(docIndex) - docsToQueries.getOrPut(docIndex) { mutableListOf() }.add(id) - } - } - totalDocsQueriedStat += transformedDocs.size.toLong() - } finally { - transformedDocs.clear() - docsSizeOfBatchInBytes = 0 - } - } - - /** Executes search query on given shard of given index to fetch docs with sequene number greater than prevSeqNo. - * This method hence fetches only docs from shard which haven't been queried before - */ - private suspend fun searchShard( - monitorCtx: MonitorRunnerExecutionContext, - index: String, - shard: String, - prevSeqNo: Long?, - maxSeqNo: Long, - docIds: List? = null, - fieldsToFetch: List, - ): SearchHits { - if (prevSeqNo?.equals(maxSeqNo) == true && maxSeqNo != 0L) { - return SearchHits.empty() - } - val boolQueryBuilder = BoolQueryBuilder() - boolQueryBuilder.filter(QueryBuilders.rangeQuery("_seq_no").gt(prevSeqNo).lte(maxSeqNo)) - - if (!docIds.isNullOrEmpty()) { - boolQueryBuilder.filter(QueryBuilders.termsQuery("_id", docIds)) - } - - val request: SearchRequest = SearchRequest() - .indices(index) - .preference("_shards:$shard") - .source( - SearchSourceBuilder() - .version(true) - .sort("_seq_no", SortOrder.DESC) - .seqNoAndPrimaryTerm(true) - .query(boolQueryBuilder) - .size(monitorCtx.docLevelMonitorShardFetchSize) - ) - .preference(Preference.PRIMARY_FIRST.type()) - request.cancelAfterTimeInterval = TimeValue.timeValueMinutes( - getCancelAfterTimeInterval() - ) - if (monitorCtx.fetchOnlyQueryFieldNames && fieldsToFetch.isNotEmpty()) { - request.source().fetchSource(false) - for (field in fieldsToFetch) { - request.source().fetchField(field) - } - } - val response: SearchResponse = monitorCtx.client!!.suspendUntil { monitorCtx.client!!.search(request, it) } - if (response.status() !== RestStatus.OK) { - throw IOException("Failed to search shard: [$shard] in index [$index]. Response status is ${response.status()}") - } - nonPercolateSearchesTimeTakenStat += response.took.millis - return response.hits + private fun getNodes(monitorCtx: MonitorRunnerExecutionContext): Map { + return monitorCtx.clusterService!!.state().nodes.dataNodes.filter { it.value.version >= Version.CURRENT } } - /** Executes percolate query on the docs against the monitor's query index and return the hits from the search response*/ - private suspend fun runPercolateQueryOnTransformedDocs( + private fun distributeShards( monitorCtx: MonitorRunnerExecutionContext, - docs: MutableList>, - monitor: Monitor, - monitorMetadata: MonitorMetadata, - concreteIndices: List, - monitorInputIndices: List, - ): SearchHits { - val indices = docs.stream().map { it.second.indexName }.distinct().collect(Collectors.toList()) - val boolQueryBuilder = BoolQueryBuilder().must(buildShouldClausesOverPerIndexMatchQueries(indices)) - val percolateQueryBuilder = - PercolateQueryBuilderExt("query", docs.map { it.second.docSource }, XContentType.JSON) - if (monitor.id.isNotEmpty()) { - boolQueryBuilder.must(QueryBuilders.matchQuery("monitor_id", monitor.id).operator(Operator.AND)) - } - boolQueryBuilder.filter(percolateQueryBuilder) - val queryIndices = - docs.map { monitorMetadata.sourceToQueryIndexMapping[it.second.indexName + monitor.id] }.distinct() - if (queryIndices.isEmpty()) { - val message = - "Monitor ${monitor.id}: Failed to resolve query Indices from source indices during monitor execution!" + - " sourceIndices: $monitorInputIndices" - logger.error(message) - throw AlertingException.wrap( - OpenSearchStatusException(message, RestStatus.INTERNAL_SERVER_ERROR) - ) - } - - val searchRequest = - SearchRequest().indices(*queryIndices.toTypedArray()).preference(Preference.PRIMARY_FIRST.type()) - val searchSourceBuilder = SearchSourceBuilder() - searchSourceBuilder.query(boolQueryBuilder) - searchRequest.source(searchSourceBuilder) - logger.debug( - "Monitor ${monitor.id}: " + - "Executing percolate query for docs from source indices " + - "$monitorInputIndices against query index $queryIndices" - ) - var response: SearchResponse - - try { - searchRequest.cancelAfterTimeInterval = TimeValue.timeValueMinutes( - getCancelAfterTimeInterval() - ) - - response = monitorCtx.client!!.suspendUntil { - monitorCtx.client!!.execute(SearchAction.INSTANCE, searchRequest, it) - } - } catch (e: Exception) { - throw IllegalStateException( - "Monitor ${monitor.id}:" + - " Failed to run percolate search for sourceIndex [${concreteIndices.joinToString()}] " + - "and queryIndex [${queryIndices.joinToString()}] for ${docs.size} document(s)", - e - ) - } - - if (response.status() !== RestStatus.OK) { - throw IOException( - "Monitor ${monitor.id}: Failed to search percolate index: ${queryIndices.joinToString()}. " + - "Response status is ${response.status()}" - ) - } - logger.debug("Monitor ${monitor.id} PERF_DEBUG: Percolate query time taken millis = ${response.took}") - percolateQueriesTimeTakenStat += response.took.millis - return response.hits - } - /** we cannot use terms query because `index` field's mapping is of type TEXT and not keyword. Refer doc-level-queries.json*/ - private fun buildShouldClausesOverPerIndexMatchQueries(indices: List): BoolQueryBuilder { - val boolQueryBuilder = QueryBuilders.boolQuery() - indices.forEach { boolQueryBuilder.should(QueryBuilders.matchQuery("index", it)) } - return boolQueryBuilder - } - - /** Transform field names and index names in all the search hits to format required to run percolate search against them. - * Hits are transformed using method transformDocumentFieldNames() */ - private fun transformSearchHitsAndReconstructDocs( - hits: SearchHits, + allNodes: List, + shards: List, index: String, - concreteIndex: String, - monitorId: String, - conflictingFields: List, - ): List> { - return hits.mapNotNull(fun(hit: SearchHit): Pair? { - try { - val sourceMap = if (hit.hasSource()) { - hit.sourceAsMap - } else { - constructSourceMapFromFieldsInHit(hit) - } - transformDocumentFieldNames( - sourceMap, - conflictingFields, - "_${index}_$monitorId", - "_${concreteIndex}_$monitorId", - "" - ) - var xContentBuilder = XContentFactory.jsonBuilder().map(sourceMap) - val sourceRef = BytesReference.bytes(xContentBuilder) - docsSizeOfBatchInBytes += sourceRef.ramBytesUsed() - totalDocsSizeInBytesStat += sourceRef.ramBytesUsed() - return Pair(hit.id, TransformedDocDto(index, concreteIndex, hit.id, sourceRef)) - } catch (e: Exception) { - logger.error("Monitor $monitorId: Failed to transform payload $hit for percolate query", e) - // skip any document which we fail to transform because we anyway won't be able to run percolate queries on them. - return null - } - }) - } - - private fun constructSourceMapFromFieldsInHit(hit: SearchHit): MutableMap { - if (hit.fields == null) - return mutableMapOf() - val sourceMap: MutableMap = mutableMapOf() - for (field in hit.fields) { - if (field.value.values != null && field.value.values.isNotEmpty()) - if (field.value.values.size == 1) { - sourceMap[field.key] = field.value.values[0] - } else sourceMap[field.key] = field.value.values + ): Map> { + val totalShards = shards.size + val numFanOutNodes = allNodes.size.coerceAtMost((totalShards + 1) / 2) + val totalNodes = monitorCtx.totalNodesFanOut.coerceAtMost(numFanOutNodes) + val shardsPerNode = totalShards / totalNodes + var shardsRemaining = totalShards % totalNodes + + val shardIdList = shards.map { + ShardId(monitorCtx.clusterService!!.state().metadata.index(index).index, it.toInt()) } - return sourceMap - } - - /** - * Traverses document fields in leaves recursively and appends [fieldNameSuffixIndex] to field names with same names - * but different mappings & [fieldNameSuffixPattern] to field names which have unique names. - * - * Example for index name is my_log_index and Monitor ID is TReewWdsf2gdJFV: - * { { - * "a": { "a": { - * "b": 1234 ----> "b_my_log_index_TReewWdsf2gdJFV": 1234 - * } } - * } - * - * @param jsonAsMap Input JSON (as Map) - * @param fieldNameSuffix Field suffix which is appended to existing field name - */ - private fun transformDocumentFieldNames( - jsonAsMap: MutableMap, - conflictingFields: List, - fieldNameSuffixPattern: String, - fieldNameSuffixIndex: String, - fieldNamePrefix: String - ) { - val tempMap = mutableMapOf() - val it: MutableIterator> = jsonAsMap.entries.iterator() - while (it.hasNext()) { - val entry = it.next() - if (entry.value is Map<*, *>) { - transformDocumentFieldNames( - entry.value as MutableMap, - conflictingFields, - fieldNameSuffixPattern, - fieldNameSuffixIndex, - if (fieldNamePrefix == "") entry.key else "$fieldNamePrefix.${entry.key}" - ) - } else if (!entry.key.endsWith(fieldNameSuffixPattern) && !entry.key.endsWith(fieldNameSuffixIndex)) { - var alreadyReplaced = false - conflictingFields.forEach { conflictingField -> - if (conflictingField == "$fieldNamePrefix.${entry.key}" || (fieldNamePrefix == "" && conflictingField == entry.key)) { - tempMap["${entry.key}$fieldNameSuffixIndex"] = entry.value - it.remove() - alreadyReplaced = true - } - } - if (!alreadyReplaced) { - tempMap["${entry.key}$fieldNameSuffixPattern"] = entry.value - it.remove() - } + val nodes = allNodes.subList(0, totalNodes) + + val nodeShardAssignments = mutableMapOf>() + var idx = 0 + for (node in nodes) { + val nodeShardAssignment = mutableSetOf() + for (i in 1..shardsPerNode) { + nodeShardAssignment.add(shardIdList[idx++]) } + nodeShardAssignments[node] = nodeShardAssignment } - jsonAsMap.putAll(tempMap) - } - - /** - * Returns true, if the docs fetched from shards thus far amount to less than threshold - * amount of percentage (default:10. setting is dynamic and configurable) of the total heap size or not. - * - */ - private fun isInMemoryDocsSizeExceedingMemoryLimit(docsBytesSize: Long, monitorCtx: MonitorRunnerExecutionContext): Boolean { - var thresholdPercentage = monitorCtx.percQueryDocsSizeMemoryPercentageLimit - val heapMaxBytes = monitorCtx.jvmStats!!.mem.heapMax.bytes - val thresholdBytes = (thresholdPercentage.toDouble() / 100.0) * heapMaxBytes - - return docsBytesSize > thresholdBytes - } - - private fun isInMemoryNumDocsExceedingMaxDocsPerPercolateQueryLimit(numDocs: Int, monitorCtx: MonitorRunnerExecutionContext): Boolean { - var maxNumDocsThreshold = monitorCtx.percQueryMaxNumDocsInMemory - return numDocs >= maxNumDocsThreshold - } - - /** - * Performs an mGet request to retrieve the documents associated with findings. - * - * When possible, this will only retrieve the document fields that are specifically - * referenced for printing in the mustache template. - */ - private suspend fun getDocSources( - findingToDocPairs: List>, - monitorCtx: MonitorRunnerExecutionContext, - monitor: Monitor - ) { - val docFieldTags = parseSampleDocTags(monitor.triggers) - val request = MultiGetRequest() - // Perform mGet request in batches. - findingToDocPairs.chunked(monitorCtx.findingsIndexBatchSize).forEach { batch -> - batch.forEach { (findingId, docIdAndIndex) -> - val docIdAndIndexSplit = docIdAndIndex.split("|") - val docId = docIdAndIndexSplit[0] - val concreteIndex = docIdAndIndexSplit[1] - if (findingId.isNotEmpty() && docId.isNotEmpty() && concreteIndex.isNotEmpty()) { - val docItem = MultiGetRequest.Item(concreteIndex, docId) - if (docFieldTags.isNotEmpty()) - docItem.fetchSourceContext(FetchSourceContext(true, docFieldTags.toTypedArray(), emptyArray())) - request.add(docItem) - } - val response = monitorCtx.client!!.suspendUntil { monitorCtx.client!!.multiGet(request, it) } - response.responses.forEach { item -> - findingIdToDocSource[findingId] = item - } + for (node in nodes) { + if (shardsRemaining == 0) { + break } + nodeShardAssignments[node]!!.add(shardIdList[idx++]) + --shardsRemaining } + return nodeShardAssignments } - - /** - * POJO holding information about each doc's concrete index, id, input index pattern/alias/datastream name - * and doc source. A list of these POJOs would be passed to percolate query execution logic. - */ - data class TransformedDocDto( - var indexName: String, - var concreteIndexName: String, - var docId: String, - var docSource: BytesReference - ) } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunner.kt index 3340cac43..69adc7ef7 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunner.kt @@ -31,6 +31,7 @@ import org.opensearch.commons.alerting.model.Table import org.opensearch.commons.alerting.model.action.Action import org.opensearch.commons.notifications.model.NotificationConfigInfo import org.opensearch.core.common.Strings +import org.opensearch.transport.TransportService import java.time.Instant abstract class MonitorRunner { @@ -42,7 +43,8 @@ abstract class MonitorRunner { periodEnd: Instant, dryRun: Boolean, workflowRunContext: WorkflowRunContext? = null, - executionId: String + executionId: String, + transportService: TransportService ): MonitorRunResult<*> suspend fun runAction( diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerExecutionContext.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerExecutionContext.kt index eefad1b6b..459743f9d 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerExecutionContext.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerExecutionContext.kt @@ -61,5 +61,6 @@ data class MonitorRunnerExecutionContext( AlertingSettings.DEFAULT_PERCOLATE_QUERY_DOCS_SIZE_MEMORY_PERCENTAGE_LIMIT, @Volatile var docLevelMonitorShardFetchSize: Int = AlertingSettings.DEFAULT_DOC_LEVEL_MONITOR_SHARD_FETCH_SIZE, + @Volatile var totalNodesFanOut: Int = AlertingSettings.DEFAULT_FAN_OUT_NODES, @Volatile var lockService: LockService? = null ) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt index 4ff0bee73..3ddff1b03 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt @@ -14,6 +14,12 @@ import org.apache.logging.log4j.LogManager import org.opensearch.action.bulk.BackoffPolicy import org.opensearch.action.search.TransportSearchAction.SEARCH_CANCEL_AFTER_TIME_INTERVAL_SETTING import org.opensearch.action.support.master.AcknowledgedResponse +import org.opensearch.alerting.action.ExecuteMonitorAction +import org.opensearch.alerting.action.ExecuteMonitorRequest +import org.opensearch.alerting.action.ExecuteMonitorResponse +import org.opensearch.alerting.action.ExecuteWorkflowAction +import org.opensearch.alerting.action.ExecuteWorkflowRequest +import org.opensearch.alerting.action.ExecuteWorkflowResponse import org.opensearch.alerting.alerts.AlertIndices import org.opensearch.alerting.alerts.AlertMover.Companion.moveAlerts import org.opensearch.alerting.core.JobRunner @@ -50,6 +56,7 @@ import org.opensearch.cluster.metadata.IndexNameExpressionResolver import org.opensearch.cluster.service.ClusterService import org.opensearch.common.lifecycle.AbstractLifecycleComponent import org.opensearch.common.settings.Settings +import org.opensearch.common.unit.TimeValue import org.opensearch.commons.alerting.model.Alert import org.opensearch.commons.alerting.model.Monitor import org.opensearch.commons.alerting.model.ScheduledJob @@ -63,6 +70,7 @@ import org.opensearch.script.Script import org.opensearch.script.ScriptService import org.opensearch.script.TemplateScript import org.opensearch.threadpool.ThreadPool +import org.opensearch.transport.TransportService import java.time.Instant import java.time.LocalDateTime import java.time.ZoneOffset @@ -220,6 +228,11 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon monitorCtx.docLevelMonitorShardFetchSize = it } + monitorCtx.totalNodesFanOut = AlertingSettings.DOC_LEVEL_MONITOR_FAN_OUT_NODES.get(monitorCtx.settings) + monitorCtx.clusterService!!.clusterSettings.addSettingsUpdateConsumer(AlertingSettings.DOC_LEVEL_MONITOR_FAN_OUT_NODES) { + monitorCtx.totalNodesFanOut = it + } + return this } @@ -313,11 +326,20 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon monitorCtx.lockService!!.acquireLock(job, it) } ?: return@launch logger.debug("lock ${lock!!.lockId} acquired") - logger.debug( - "PERF_DEBUG: executing workflow ${job.id} on node " + - monitorCtx.clusterService!!.state().nodes().localNode.id - ) - runJob(job, periodStart, periodEnd, false) + + monitorCtx.client!!.suspendUntil { + monitorCtx.client!!.execute( + ExecuteWorkflowAction.INSTANCE, + ExecuteWorkflowRequest( + false, + TimeValue(periodEnd.toEpochMilli()), + job.id, + job, + TimeValue(periodStart.toEpochMilli()) + ), + it + ) + } } finally { monitorCtx.client!!.suspendUntil { monitorCtx.lockService!!.release(lock, it) } logger.debug("lock ${lock!!.lockId} released") @@ -336,7 +358,20 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon "PERF_DEBUG: executing ${job.monitorType} ${job.id} on node " + monitorCtx.clusterService!!.state().nodes().localNode.id ) - runJob(job, periodStart, periodEnd, false) + val executeMonitorRequest = ExecuteMonitorRequest( + false, + TimeValue(periodEnd.toEpochMilli()), + job.id, + job, + TimeValue(periodStart.toEpochMilli()) + ) + monitorCtx.client!!.suspendUntil { + monitorCtx.client!!.execute( + ExecuteMonitorAction.INSTANCE, + executeMonitorRequest, + it + ) + } } finally { monitorCtx.client!!.suspendUntil { monitorCtx.lockService!!.release(lock, it) } logger.debug("lock ${lock!!.lockId} released") @@ -349,11 +384,23 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon } } - suspend fun runJob(workflow: Workflow, periodStart: Instant, periodEnd: Instant, dryrun: Boolean): WorkflowRunResult { - return CompositeWorkflowRunner.runWorkflow(workflow, monitorCtx, periodStart, periodEnd, dryrun) + suspend fun runJob( + workflow: Workflow, + periodStart: Instant, + periodEnd: Instant, + dryrun: Boolean, + transportService: TransportService + ): WorkflowRunResult { + return CompositeWorkflowRunner.runWorkflow(workflow, monitorCtx, periodStart, periodEnd, dryrun, transportService) } - suspend fun runJob(job: ScheduledJob, periodStart: Instant, periodEnd: Instant, dryrun: Boolean): MonitorRunResult<*> { + suspend fun runJob( + job: ScheduledJob, + periodStart: Instant, + periodEnd: Instant, + dryrun: Boolean, + transportService: TransportService + ): MonitorRunResult<*> { // Updating the scheduled job index at the start of monitor execution runs for when there is an upgrade the the schema mapping // has not been updated. if (!IndexUtils.scheduledJobIndexUpdated && monitorCtx.clusterService != null && monitorCtx.client != null) { @@ -373,7 +420,7 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon if (job is Workflow) { logger.info("Executing scheduled workflow - id: ${job.id}, periodStart: $periodStart, periodEnd: $periodEnd, dryrun: $dryrun") - CompositeWorkflowRunner.runWorkflow(workflow = job, monitorCtx, periodStart, periodEnd, dryrun) + CompositeWorkflowRunner.runWorkflow(workflow = job, monitorCtx, periodStart, periodEnd, dryrun, transportService) } val monitor = job as Monitor val executionId = "${monitor.id}_${LocalDateTime.now(ZoneOffset.UTC)}_${UUID.randomUUID()}" @@ -382,11 +429,35 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon "periodEnd: $periodEnd, dryrun: $dryrun, executionId: $executionId" ) val runResult = if (monitor.isBucketLevelMonitor()) { - BucketLevelMonitorRunner.runMonitor(monitor, monitorCtx, periodStart, periodEnd, dryrun, executionId = executionId) + BucketLevelMonitorRunner.runMonitor( + monitor, + monitorCtx, + periodStart, + periodEnd, + dryrun, + executionId = executionId, + transportService = transportService + ) } else if (monitor.isDocLevelMonitor()) { - DocumentLevelMonitorRunner().runMonitor(monitor, monitorCtx, periodStart, periodEnd, dryrun, executionId = executionId) + DocumentLevelMonitorRunner().runMonitor( + monitor, + monitorCtx, + periodStart, + periodEnd, + dryrun, + executionId = executionId, + transportService = transportService + ) } else { - QueryLevelMonitorRunner.runMonitor(monitor, monitorCtx, periodStart, periodEnd, dryrun, executionId = executionId) + QueryLevelMonitorRunner.runMonitor( + monitor, + monitorCtx, + periodStart, + periodEnd, + dryrun, + executionId = executionId, + transportService = transportService + ) } return runResult } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/QueryLevelMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/QueryLevelMonitorRunner.kt index 3d9ab5a78..b975af728 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/QueryLevelMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/QueryLevelMonitorRunner.kt @@ -17,6 +17,7 @@ import org.opensearch.alerting.workflow.WorkflowRunContext import org.opensearch.commons.alerting.model.Alert import org.opensearch.commons.alerting.model.Monitor import org.opensearch.commons.alerting.model.QueryLevelTrigger +import org.opensearch.transport.TransportService import java.time.Instant object QueryLevelMonitorRunner : MonitorRunner() { @@ -29,7 +30,8 @@ object QueryLevelMonitorRunner : MonitorRunner() { periodEnd: Instant, dryrun: Boolean, workflowRunContext: WorkflowRunContext?, - executionId: String + executionId: String, + transportService: TransportService ): MonitorRunResult { val roles = MonitorRunnerService.getRolesForMonitor(monitor) logger.debug("Running monitor: ${monitor.name} with roles: $roles Thread: ${Thread.currentThread().name}") diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutAction.kt new file mode 100644 index 000000000..c03d95942 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutAction.kt @@ -0,0 +1,15 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.action + +import org.opensearch.action.ActionType + +class DocLevelMonitorFanOutAction private constructor() : ActionType(NAME, ::DocLevelMonitorFanOutResponse) { + companion object { + val INSTANCE = DocLevelMonitorFanOutAction() + const val NAME = "cluster:admin/opensearch/alerting/monitor/doclevel/fanout" + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutRequest.kt b/alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutRequest.kt new file mode 100644 index 000000000..7b16b8961 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutRequest.kt @@ -0,0 +1,101 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.action + +import org.opensearch.action.ActionRequest +import org.opensearch.action.ActionRequestValidationException +import org.opensearch.alerting.model.IndexExecutionContext +import org.opensearch.alerting.model.MonitorMetadata +import org.opensearch.alerting.workflow.WorkflowRunContext +import org.opensearch.commons.alerting.model.Monitor +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.index.shard.ShardId +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.ToXContentObject +import org.opensearch.core.xcontent.XContentBuilder +import java.io.IOException + +class DocLevelMonitorFanOutRequest : ActionRequest, ToXContentObject { + val monitor: Monitor + val dryRun: Boolean + val monitorMetadata: MonitorMetadata + val executionId: String + val indexExecutionContext: IndexExecutionContext + val shardIds: List + val concreteIndicesSeenSoFar: List + val workflowRunContext: WorkflowRunContext? + + constructor( + monitor: Monitor, + dryRun: Boolean, + monitorMetadata: MonitorMetadata, + executionId: String, + indexExecutionContext: IndexExecutionContext, + shardIds: List, + concreteIndicesSeenSoFar: List, + workflowRunContext: WorkflowRunContext?, + ) : super() { + this.monitor = monitor + this.dryRun = dryRun + this.monitorMetadata = monitorMetadata + this.executionId = executionId + this.indexExecutionContext = indexExecutionContext + this.shardIds = shardIds + this.concreteIndicesSeenSoFar = concreteIndicesSeenSoFar + this.workflowRunContext = workflowRunContext + require(false == shardIds.isEmpty()) { } + } + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + monitor = Monitor.readFrom(sin)!!, + dryRun = sin.readBoolean(), + monitorMetadata = MonitorMetadata.readFrom(sin), + executionId = sin.readString(), + shardIds = sin.readList(::ShardId), + concreteIndicesSeenSoFar = sin.readStringList(), + workflowRunContext = if (sin.readBoolean()) { + WorkflowRunContext(sin) + } else null, + indexExecutionContext = IndexExecutionContext(sin) + ) + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + monitor.writeTo(out) + out.writeBoolean(dryRun) + monitorMetadata.writeTo(out) + out.writeString(executionId) + out.writeCollection(shardIds) + out.writeStringCollection(concreteIndicesSeenSoFar) + out.writeBoolean(workflowRunContext != null) + workflowRunContext?.writeTo(out) + indexExecutionContext.writeTo(out) + } + + override fun validate(): ActionRequestValidationException? { + var actionValidationException: ActionRequestValidationException? = null + if (shardIds.isEmpty()) { + actionValidationException = ActionRequestValidationException() + actionValidationException.addValidationError("shard_ids is null or empty") + } + return actionValidationException + } + + @Throws(IOException::class) + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + builder.startObject() + .field("monitor", monitor) + .field("dry_run", dryRun) + .field("execution_id", executionId) + .field("index_execution_context", indexExecutionContext) + .field("shard_ids", shardIds) + .field("concrete_indices", concreteIndicesSeenSoFar) + .field("workflow_run_context", workflowRunContext) + return builder.endObject() + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutResponse.kt b/alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutResponse.kt new file mode 100644 index 000000000..8c74de356 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutResponse.kt @@ -0,0 +1,92 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.action + +import org.opensearch.alerting.model.DocumentLevelTriggerRunResult +import org.opensearch.alerting.model.InputRunResults +import org.opensearch.alerting.util.AlertingException +import org.opensearch.core.action.ActionResponse +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.ToXContentObject +import org.opensearch.core.xcontent.XContentBuilder +import java.io.IOException + +class DocLevelMonitorFanOutResponse : ActionResponse, ToXContentObject { + val nodeId: String + val executionId: String + val monitorId: String + val lastRunContexts: MutableMap + val inputResults: InputRunResults + val triggerResults: Map + val exception: AlertingException? + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + nodeId = sin.readString(), + executionId = sin.readString(), + monitorId = sin.readString(), + lastRunContexts = sin.readMap()!! as MutableMap, + inputResults = InputRunResults.readFrom(sin), + triggerResults = suppressWarning(sin.readMap(StreamInput::readString, DocumentLevelTriggerRunResult::readFrom)), + exception = sin.readException() + ) + + constructor( + nodeId: String, + executionId: String, + monitorId: String, + lastRunContexts: MutableMap, + inputResults: InputRunResults = InputRunResults(), // partial, + triggerResults: Map = mapOf(), + exception: AlertingException? = null + ) : super() { + this.nodeId = nodeId + this.executionId = executionId + this.monitorId = monitorId + this.lastRunContexts = lastRunContexts + this.inputResults = inputResults + this.triggerResults = triggerResults + this.exception = exception + } + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + out.writeString(nodeId) + out.writeString(executionId) + out.writeString(monitorId) + out.writeMap(lastRunContexts) + inputResults.writeTo(out) + out.writeMap( + triggerResults, + StreamOutput::writeString, + { stream, stats -> stats.writeTo(stream) } + ) + out.writeException(exception) + } + + @Throws(IOException::class) + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + builder.startObject() + .field("node_id", nodeId) + .field("execution_id", executionId) + .field("monitor_id", monitorId) + .field("last_run_contexts", lastRunContexts) + .field("input_results", inputResults) + .field("trigger_results", triggerResults) + .field("exception", exception) + .endObject() + return builder + } + + companion object { + @Suppress("UNCHECKED_CAST") + fun suppressWarning(map: MutableMap?): Map { + return map as Map + } + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorRequest.kt b/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorRequest.kt index ecc504677..c7b699dfc 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorRequest.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorRequest.kt @@ -18,17 +18,20 @@ class ExecuteMonitorRequest : ActionRequest { val requestEnd: TimeValue val monitorId: String? val monitor: Monitor? + val requestStart: TimeValue? constructor( dryrun: Boolean, requestEnd: TimeValue, monitorId: String?, - monitor: Monitor? + monitor: Monitor?, + requestStart: TimeValue? = null ) : super() { this.dryrun = dryrun this.requestEnd = requestEnd this.monitorId = monitorId this.monitor = monitor + this.requestStart = requestStart } @Throws(IOException::class) @@ -38,7 +41,8 @@ class ExecuteMonitorRequest : ActionRequest { sin.readOptionalString(), // monitorId if (sin.readBoolean()) { Monitor.readFrom(sin) // monitor - } else null + } else null, + sin.readOptionalTimeValue() ) override fun validate(): ActionRequestValidationException? { @@ -56,5 +60,6 @@ class ExecuteMonitorRequest : ActionRequest { } else { out.writeBoolean(false) } + out.writeOptionalTimeValue(requestStart) } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteWorkflowRequest.kt b/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteWorkflowRequest.kt index 3b3d48ed2..104448cce 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteWorkflowRequest.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteWorkflowRequest.kt @@ -22,15 +22,18 @@ class ExecuteWorkflowRequest : ActionRequest { val requestEnd: TimeValue val workflowId: String? val workflow: Workflow? + val requestStart: TimeValue? constructor( dryrun: Boolean, requestEnd: TimeValue, workflowId: String?, workflow: Workflow?, + requestStart: TimeValue? = null, ) : super() { this.dryrun = dryrun this.requestEnd = requestEnd + this.requestStart = requestStart this.workflowId = workflowId this.workflow = workflow } @@ -42,7 +45,8 @@ class ExecuteWorkflowRequest : ActionRequest { sin.readOptionalString(), if (sin.readBoolean()) { Workflow.readFrom(sin) - } else null + } else null, + sin.readOptionalTimeValue() ) override fun validate(): ActionRequestValidationException? { @@ -59,6 +63,7 @@ class ExecuteWorkflowRequest : ActionRequest { override fun writeTo(out: StreamOutput) { out.writeBoolean(dryrun) out.writeTimeValue(requestEnd) + out.writeOptionalTimeValue(requestStart) out.writeOptionalString(workflowId) if (workflow != null) { out.writeBoolean(true) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/model/DocumentLevelTriggerRunResult.kt b/alerting/src/main/kotlin/org/opensearch/alerting/model/DocumentLevelTriggerRunResult.kt index 9d98aab42..a89d89da7 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/model/DocumentLevelTriggerRunResult.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/model/DocumentLevelTriggerRunResult.kt @@ -25,7 +25,7 @@ data class DocumentLevelTriggerRunResult( triggerName = sin.readString(), error = sin.readException(), triggeredDocs = sin.readStringList(), - actionResultsMap = sin.readMap() as MutableMap> + actionResultsMap = readActionResults(sin) ) override fun internalXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { @@ -39,7 +39,15 @@ data class DocumentLevelTriggerRunResult( override fun writeTo(out: StreamOutput) { super.writeTo(out) out.writeStringCollection(triggeredDocs) - out.writeMap(actionResultsMap as Map) + out.writeInt(actionResultsMap.size) + actionResultsMap.forEach { (alert, actionResults) -> + out.writeString(alert) + out.writeInt(actionResults.size) + actionResults.forEach { (id, result) -> + out.writeString(id) + result.writeTo(out) + } + } } companion object { @@ -48,5 +56,27 @@ data class DocumentLevelTriggerRunResult( fun readFrom(sin: StreamInput): TriggerRunResult { return DocumentLevelTriggerRunResult(sin) } + + @JvmStatic + fun readActionResults(sin: StreamInput): MutableMap> { + val actionResultsMapReconstruct: MutableMap> = mutableMapOf() + val size = sin.readInt() + var idx = 0 + while (idx < size) { + val alert = sin.readString() + val actionResultsSize = sin.readInt() + val actionRunResultElem = mutableMapOf() + var i = 0 + while (i < actionResultsSize) { + val actionId = sin.readString() + val actionResult = ActionRunResult.readFrom(sin) + actionRunResultElem[actionId] = actionResult + ++i + } + actionResultsMapReconstruct[alert] = actionRunResultElem + ++idx + } + return actionResultsMapReconstruct + } } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/model/IndexExecutionContext.kt b/alerting/src/main/kotlin/org/opensearch/alerting/model/IndexExecutionContext.kt index e7aa707f9..b36460d96 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/model/IndexExecutionContext.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/model/IndexExecutionContext.kt @@ -6,14 +6,62 @@ package org.opensearch.alerting.model import org.opensearch.commons.alerting.model.DocLevelQuery +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.common.io.stream.Writeable +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.XContentBuilder +import java.io.IOException -/** DTO that contains all the necessary context for fetching data from shard and performing percolate queries */ data class IndexExecutionContext( val queries: List, - val lastRunContext: MutableMap, - val updatedLastRunContext: MutableMap, + val lastRunContext: MutableMap, // previous execution + val updatedLastRunContext: MutableMap, // without sequence numbers val indexName: String, val concreteIndexName: String, + val updatedIndexNames: List, + val concreteIndexNames: List, val conflictingFields: List, - val docIds: List? = null, -) + val docIds: List? = emptyList(), +) : Writeable, ToXContent { + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + queries = sin.readList { DocLevelQuery(sin) }, + lastRunContext = sin.readMap(), + updatedLastRunContext = sin.readMap(), + indexName = sin.readString(), + concreteIndexName = sin.readString(), + updatedIndexNames = sin.readStringList(), + concreteIndexNames = sin.readStringList(), + conflictingFields = sin.readStringList(), + docIds = sin.readOptionalStringList() + ) + + override fun writeTo(out: StreamOutput?) { + out!!.writeCollection(queries) + out.writeMap(lastRunContext) + out.writeMap(updatedLastRunContext) + out.writeString(indexName) + out.writeString(concreteIndexName) + out.writeStringCollection(updatedIndexNames) + out.writeStringCollection(concreteIndexNames) + out.writeStringCollection(conflictingFields) + out.writeOptionalStringCollection(docIds) + } + + override fun toXContent(builder: XContentBuilder?, params: ToXContent.Params?): XContentBuilder { + builder!!.startObject() + .field("queries", queries) + .field("last_run_context", lastRunContext) + .field("updated_last_run_context", updatedLastRunContext) + .field("index_name", indexName) + .field("concrete_index_name", concreteIndexName) + .field("udpated_index_names", updatedIndexNames) + .field("concrete_index_names", concreteIndexNames) + .field("conflicting_fields", conflictingFields) + .field("doc_ids", docIds) + .endObject() + return builder + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt b/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt index 8f6eb31b7..7a2a8aa48 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt @@ -21,6 +21,7 @@ class AlertingSettings { const val DEFAULT_PERCOLATE_QUERY_NUM_DOCS_IN_MEMORY = 50000 const val DEFAULT_PERCOLATE_QUERY_DOCS_SIZE_MEMORY_PERCENTAGE_LIMIT = 10 const val DEFAULT_DOC_LEVEL_MONITOR_SHARD_FETCH_SIZE = 10000 + const val DEFAULT_FAN_OUT_NODES = 1000 val ALERTING_MAX_MONITORS = Setting.intSetting( "plugins.alerting.monitor.max_monitors", @@ -213,5 +214,13 @@ class AlertingSettings { false, Setting.Property.NodeScope, Setting.Property.Dynamic ) + + val DOC_LEVEL_MONITOR_FAN_OUT_NODES = Setting.intSetting( + "plugins.alerting.monitor.doc_level_monitor_fan_out_nodes", + DEFAULT_FAN_OUT_NODES, + 1, + Int.MAX_VALUE, + Setting.Property.NodeScope, Setting.Property.Dynamic + ) } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDocLevelMonitorFanOutAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDocLevelMonitorFanOutAction.kt new file mode 100644 index 000000000..2e0a3811e --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDocLevelMonitorFanOutAction.kt @@ -0,0 +1,1183 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.transport + +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.launch +import org.apache.logging.log4j.LogManager +import org.opensearch.OpenSearchSecurityException +import org.opensearch.OpenSearchStatusException +import org.opensearch.action.DocWriteRequest +import org.opensearch.action.admin.indices.refresh.RefreshAction +import org.opensearch.action.admin.indices.refresh.RefreshRequest +import org.opensearch.action.bulk.BackoffPolicy +import org.opensearch.action.bulk.BulkRequest +import org.opensearch.action.bulk.BulkResponse +import org.opensearch.action.get.MultiGetItemResponse +import org.opensearch.action.get.MultiGetRequest +import org.opensearch.action.index.IndexRequest +import org.opensearch.action.search.SearchAction +import org.opensearch.action.search.SearchRequest +import org.opensearch.action.search.SearchResponse +import org.opensearch.action.support.ActionFilters +import org.opensearch.action.support.HandledTransportAction +import org.opensearch.alerting.AlertService +import org.opensearch.alerting.MonitorRunnerService +import org.opensearch.alerting.TriggerService +import org.opensearch.alerting.action.DocLevelMonitorFanOutAction +import org.opensearch.alerting.action.DocLevelMonitorFanOutRequest +import org.opensearch.alerting.action.DocLevelMonitorFanOutResponse +import org.opensearch.alerting.action.GetDestinationsAction +import org.opensearch.alerting.action.GetDestinationsRequest +import org.opensearch.alerting.action.GetDestinationsResponse +import org.opensearch.alerting.model.ActionRunResult +import org.opensearch.alerting.model.AlertContext +import org.opensearch.alerting.model.DocumentLevelTriggerRunResult +import org.opensearch.alerting.model.IndexExecutionContext +import org.opensearch.alerting.model.InputRunResults +import org.opensearch.alerting.model.MonitorMetadata +import org.opensearch.alerting.model.MonitorRunResult +import org.opensearch.alerting.model.destination.Destination +import org.opensearch.alerting.model.destination.DestinationContextFactory +import org.opensearch.alerting.model.userErrorMessage +import org.opensearch.alerting.opensearchapi.InjectorContextElement +import org.opensearch.alerting.opensearchapi.convertToMap +import org.opensearch.alerting.opensearchapi.suspendUntil +import org.opensearch.alerting.opensearchapi.withClosableContext +import org.opensearch.alerting.script.DocumentLevelTriggerExecutionContext +import org.opensearch.alerting.script.QueryLevelTriggerExecutionContext +import org.opensearch.alerting.script.TriggerExecutionContext +import org.opensearch.alerting.settings.AlertingSettings +import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERT_BACKOFF_COUNT +import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERT_BACKOFF_MILLIS +import org.opensearch.alerting.settings.AlertingSettings.Companion.DOC_LEVEL_MONITOR_FETCH_ONLY_QUERY_FIELDS_ENABLED +import org.opensearch.alerting.settings.AlertingSettings.Companion.DOC_LEVEL_MONITOR_SHARD_FETCH_SIZE +import org.opensearch.alerting.settings.AlertingSettings.Companion.FINDINGS_INDEXING_BATCH_SIZE +import org.opensearch.alerting.settings.AlertingSettings.Companion.MAX_ACTIONABLE_ALERT_COUNT +import org.opensearch.alerting.settings.AlertingSettings.Companion.PERCOLATE_QUERY_DOCS_SIZE_MEMORY_PERCENTAGE_LIMIT +import org.opensearch.alerting.settings.AlertingSettings.Companion.PERCOLATE_QUERY_MAX_NUM_DOCS_IN_MEMORY +import org.opensearch.alerting.settings.DestinationSettings +import org.opensearch.alerting.util.AlertingException +import org.opensearch.alerting.util.defaultToPerExecutionAction +import org.opensearch.alerting.util.destinationmigration.NotificationActionConfigs +import org.opensearch.alerting.util.destinationmigration.NotificationApiUtils +import org.opensearch.alerting.util.destinationmigration.getTitle +import org.opensearch.alerting.util.destinationmigration.publishLegacyNotification +import org.opensearch.alerting.util.destinationmigration.sendNotification +import org.opensearch.alerting.util.getActionExecutionPolicy +import org.opensearch.alerting.util.isAllowed +import org.opensearch.alerting.util.isTestAction +import org.opensearch.alerting.util.parseSampleDocTags +import org.opensearch.alerting.util.printsSampleDocData +import org.opensearch.alerting.workflow.WorkflowRunContext +import org.opensearch.client.Client +import org.opensearch.client.node.NodeClient +import org.opensearch.cluster.routing.Preference +import org.opensearch.cluster.service.ClusterService +import org.opensearch.common.inject.Inject +import org.opensearch.common.settings.Settings +import org.opensearch.common.xcontent.XContentFactory +import org.opensearch.common.xcontent.XContentType +import org.opensearch.commons.alerting.AlertingPluginInterface +import org.opensearch.commons.alerting.action.PublishFindingsRequest +import org.opensearch.commons.alerting.action.SubscribeFindingsResponse +import org.opensearch.commons.alerting.model.ActionExecutionResult +import org.opensearch.commons.alerting.model.Alert +import org.opensearch.commons.alerting.model.DocLevelMonitorInput +import org.opensearch.commons.alerting.model.DocLevelQuery +import org.opensearch.commons.alerting.model.DocumentLevelTrigger +import org.opensearch.commons.alerting.model.Finding +import org.opensearch.commons.alerting.model.Monitor +import org.opensearch.commons.alerting.model.Table +import org.opensearch.commons.alerting.model.action.Action +import org.opensearch.commons.alerting.model.action.PerAlertActionScope +import org.opensearch.commons.alerting.util.string +import org.opensearch.commons.notifications.model.NotificationConfigInfo +import org.opensearch.core.action.ActionListener +import org.opensearch.core.common.Strings +import org.opensearch.core.common.bytes.BytesReference +import org.opensearch.core.rest.RestStatus +import org.opensearch.core.xcontent.NamedXContentRegistry +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.XContentBuilder +import org.opensearch.index.query.BoolQueryBuilder +import org.opensearch.index.query.Operator +import org.opensearch.index.query.QueryBuilders +import org.opensearch.index.seqno.SequenceNumbers +import org.opensearch.indices.IndexClosedException +import org.opensearch.monitor.jvm.JvmStats +import org.opensearch.percolator.PercolateQueryBuilderExt +import org.opensearch.script.Script +import org.opensearch.script.ScriptService +import org.opensearch.script.TemplateScript +import org.opensearch.search.SearchHit +import org.opensearch.search.SearchHits +import org.opensearch.search.builder.SearchSourceBuilder +import org.opensearch.search.fetch.subphase.FetchSourceContext +import org.opensearch.search.sort.SortOrder +import org.opensearch.tasks.Task +import org.opensearch.transport.TransportService +import java.io.IOException +import java.time.Instant +import java.util.UUID +import java.util.stream.Collectors + +private val log = LogManager.getLogger(TransportDocLevelMonitorFanOutAction::class.java) +private val scope: CoroutineScope = CoroutineScope(Dispatchers.IO) + +class TransportDocLevelMonitorFanOutAction +@Inject constructor( + transportService: TransportService, + val client: Client, + val actionFilters: ActionFilters, + val clusterService: ClusterService, + val triggerService: TriggerService, + val alertService: AlertService, + val scriptService: ScriptService, + val settings: Settings, + val xContentRegistry: NamedXContentRegistry +) : HandledTransportAction( + DocLevelMonitorFanOutAction.NAME, transportService, actionFilters, ::DocLevelMonitorFanOutRequest +), + SecureTransportAction { + var nonPercolateSearchesTimeTakenStat = 0L + var percolateQueriesTimeTakenStat = 0L + var totalDocsQueriedStat = 0L + var docTransformTimeTakenStat = 0L + var totalDocsSizeInBytesStat = 0L + var docsSizeOfBatchInBytes = 0L + var findingsToTriggeredQueries: Map> = mutableMapOf() + + @Volatile var percQueryMaxNumDocsInMemory: Int = PERCOLATE_QUERY_MAX_NUM_DOCS_IN_MEMORY.get(settings) + @Volatile var percQueryDocsSizeMemoryPercentageLimit: Int = PERCOLATE_QUERY_DOCS_SIZE_MEMORY_PERCENTAGE_LIMIT.get(settings) + @Volatile var docLevelMonitorShardFetchSize: Int = DOC_LEVEL_MONITOR_SHARD_FETCH_SIZE.get(settings) + @Volatile var findingsIndexBatchSize: Int = FINDINGS_INDEXING_BATCH_SIZE.get(settings) + @Volatile var maxActionableAlertCount: Long = MAX_ACTIONABLE_ALERT_COUNT.get(settings) + @Volatile var retryPolicy = BackoffPolicy.constantBackoff(ALERT_BACKOFF_MILLIS.get(settings), ALERT_BACKOFF_COUNT.get(settings)) + @Volatile var allowList: List = DestinationSettings.ALLOW_LIST.get(settings) + @Volatile var fetchOnlyQueryFieldNames = DOC_LEVEL_MONITOR_FETCH_ONLY_QUERY_FIELDS_ENABLED.get(settings) + + init { + clusterService.clusterSettings.addSettingsUpdateConsumer(PERCOLATE_QUERY_MAX_NUM_DOCS_IN_MEMORY) { + percQueryMaxNumDocsInMemory = it + } + clusterService.clusterSettings.addSettingsUpdateConsumer(PERCOLATE_QUERY_DOCS_SIZE_MEMORY_PERCENTAGE_LIMIT) { + percQueryDocsSizeMemoryPercentageLimit = it + } + clusterService.clusterSettings.addSettingsUpdateConsumer(DOC_LEVEL_MONITOR_SHARD_FETCH_SIZE) { + docLevelMonitorShardFetchSize = it + } + clusterService.clusterSettings.addSettingsUpdateConsumer(FINDINGS_INDEXING_BATCH_SIZE) { + findingsIndexBatchSize = it + } + clusterService.clusterSettings.addSettingsUpdateConsumer(MAX_ACTIONABLE_ALERT_COUNT) { + maxActionableAlertCount = it + } + clusterService.clusterSettings.addSettingsUpdateConsumer(ALERT_BACKOFF_MILLIS, ALERT_BACKOFF_COUNT) { millis, count -> + retryPolicy = BackoffPolicy.constantBackoff(millis, count) + } + clusterService.clusterSettings.addSettingsUpdateConsumer(DestinationSettings.ALLOW_LIST) { + allowList = it + } + clusterService.clusterSettings.addSettingsUpdateConsumer(DOC_LEVEL_MONITOR_FETCH_ONLY_QUERY_FIELDS_ENABLED) { + fetchOnlyQueryFieldNames = it + } + } + + @Volatile + override var filterByEnabled = AlertingSettings.FILTER_BY_BACKEND_ROLES.get(settings) + + override fun doExecute( + task: Task, + request: DocLevelMonitorFanOutRequest, + listener: ActionListener + ) { + scope.launch { + executeMonitor(request, listener) + } + } + + private suspend fun executeMonitor( + request: DocLevelMonitorFanOutRequest, + listener: ActionListener + ) { + try { + val monitor = request.monitor + var monitorResult = MonitorRunResult(monitor.name, Instant.now(), Instant.now()) + val updatedIndexNames = request.indexExecutionContext.updatedIndexNames + val monitorMetadata = request.monitorMetadata + val shardIds = request.shardIds + val indexExecutionContext = request.indexExecutionContext + val concreteIndicesSeenSoFar = request.concreteIndicesSeenSoFar + val dryrun = request.dryRun + val executionId = request.executionId + val workflowRunContext = request.workflowRunContext + + val queryToDocIds = mutableMapOf>() + val inputRunResults = mutableMapOf>() + val docsToQueries = mutableMapOf>() + val transformedDocs = mutableListOf>() + val findingIdToDocSource = mutableMapOf() + val isTempMonitor = dryrun || monitor.id == Monitor.NO_ID + + val docLevelMonitorInput = request.monitor.inputs[0] as DocLevelMonitorInput + val queries: List = docLevelMonitorInput.queries + val fieldsToBeQueried = mutableSetOf() + if (fetchOnlyQueryFieldNames) { + for (it in queries) { + if (it.queryFieldNames.isEmpty()) { + fieldsToBeQueried.clear() + log.debug( + "Monitor ${request.monitor.id} : " + + "Doc Level query ${it.id} : ${it.query} doesn't have queryFieldNames populated. " + + "Cannot optimize monitor to fetch only query-relevant fields. " + + "Querying entire doc source." + ) + break + } + fieldsToBeQueried.addAll(it.queryFieldNames) + } + if (fieldsToBeQueried.isNotEmpty()) { + log.debug( + "Monitor ${monitor.id} Querying only fields " + + "${fieldsToBeQueried.joinToString()} instead of entire _source of documents" + ) + } + } + + fetchShardDataAndMaybeExecutePercolateQueries( + monitor, + indexExecutionContext, + monitorMetadata, + inputRunResults, + docsToQueries, + updatedIndexNames, + concreteIndicesSeenSoFar, + ArrayList(fieldsToBeQueried), + shardIds.map { it.id }, + transformedDocs + ) { shard, maxSeqNo -> // function passed to update last run context with new max sequence number + indexExecutionContext.updatedLastRunContext[shard] = maxSeqNo + } + if (transformedDocs.isNotEmpty()) { + performPercolateQueryAndResetCounters( + monitor, + monitorMetadata, + updatedIndexNames, + concreteIndicesSeenSoFar, + inputRunResults, + docsToQueries, + transformedDocs + ) + } + monitorResult = monitorResult.copy(inputResults = InputRunResults(listOf(inputRunResults))) + + /* + populate the map queryToDocIds with pairs of + this fixes the issue of passing id, name, tags fields of DocLevelQuery object correctly to TriggerExpressionParser + */ + queries.forEach { + if (inputRunResults.containsKey(it.id)) { + queryToDocIds[it] = inputRunResults[it.id]!! + } + } + + val idQueryMap: Map = queries.associateBy { it.id } + + val triggerResults = mutableMapOf() + // If there are no triggers defined, we still want to generate findings + if (monitor.triggers.isEmpty()) { + if (dryrun == false && monitor.id != Monitor.NO_ID) { + createFindings(monitor, docsToQueries, idQueryMap, true) + } + } else { + monitor.triggers.forEach { + triggerResults[it.id] = runForEachDocTrigger( + monitorResult, + it as DocumentLevelTrigger, + monitor, + idQueryMap, + docsToQueries, + queryToDocIds, + dryrun, + executionId = executionId, + findingIdToDocSource, + workflowRunContext = workflowRunContext + ) + } + } + + if (!isTempMonitor) { + // If any error happened during trigger execution, upsert monitor error alert + val errorMessage = constructErrorMessageFromTriggerResults(triggerResults = triggerResults) + log.info(errorMessage) + if (errorMessage.isNotEmpty()) { + alertService.upsertMonitorErrorAlert( + monitor = monitor, + errorMessage = errorMessage, + executionId = executionId, + workflowRunContext + ) + } else { + onSuccessfulMonitorRun(monitor) + } + } + + listener.onResponse( + DocLevelMonitorFanOutResponse( + nodeId = clusterService.localNode().id, + executionId = request.executionId, + monitorId = monitor.id, + indexExecutionContext.updatedLastRunContext, + InputRunResults(listOf(inputRunResults)), + triggerResults + ) + ) + } catch (e: Exception) { + log.error("${request.monitor.id} Failed to run fan_out on node ${clusterService.localNode().id} due to error $e") + listener.onFailure(AlertingException.wrap(e)) + } + } + + private suspend fun runForEachDocTrigger( + monitorResult: MonitorRunResult, + trigger: DocumentLevelTrigger, + monitor: Monitor, + idQueryMap: Map, + docsToQueries: MutableMap>, + queryToDocIds: Map>, + dryrun: Boolean, + executionId: String, + findingIdToDocSource: MutableMap, + workflowRunContext: WorkflowRunContext? + ): DocumentLevelTriggerRunResult { + val triggerCtx = DocumentLevelTriggerExecutionContext(monitor, trigger) + val triggerResult = triggerService.runDocLevelTrigger(monitor, trigger, queryToDocIds) + + val triggerFindingDocPairs = mutableListOf>() + + // TODO: Implement throttling for findings + val findingToDocPairs = createFindings( + monitor, + docsToQueries, + idQueryMap, + !dryrun && monitor.id != Monitor.NO_ID, + executionId + ) + + findingToDocPairs.forEach { + // Only pick those entries whose docs have triggers associated with them + if (triggerResult.triggeredDocs.contains(it.second)) { + triggerFindingDocPairs.add(Pair(it.first, it.second)) + } + } + + val actionCtx = triggerCtx.copy( + triggeredDocs = triggerResult.triggeredDocs, + relatedFindings = findingToDocPairs.map { it.first }, + error = monitorResult.error ?: triggerResult.error + ) + + if (printsSampleDocData(trigger) && triggerFindingDocPairs.isNotEmpty()) + getDocSources( + findingToDocPairs = findingToDocPairs, + monitor = monitor, + findingIdToDocSource = findingIdToDocSource + ) + + val alerts = mutableListOf() + val alertContexts = mutableListOf() + triggerFindingDocPairs.forEach { + val alert = alertService.composeDocLevelAlert( + listOf(it.first), + listOf(it.second), + triggerCtx, + monitorResult.alertError() ?: triggerResult.alertError(), + executionId = executionId, + workflorwRunContext = workflowRunContext + ) + alerts.add(alert) + + val docSource = findingIdToDocSource[alert.findingIds.first()]?.response?.convertToMap() + + alertContexts.add( + AlertContext( + alert = alert, + associatedQueries = alert.findingIds.flatMap { findingId -> + findingsToTriggeredQueries.getOrDefault(findingId, emptyList()) ?: emptyList() + }, + sampleDocs = listOfNotNull(docSource) + ) + ) + } + + val shouldDefaultToPerExecution = defaultToPerExecutionAction( + maxActionableAlertCount, + monitorId = monitor.id, + triggerId = trigger.id, + totalActionableAlertCount = alerts.size, + monitorOrTriggerError = actionCtx.error + ) + + for (action in trigger.actions) { + val actionExecutionScope = action.getActionExecutionPolicy(monitor)!!.actionExecutionScope + if (actionExecutionScope is PerAlertActionScope && !shouldDefaultToPerExecution) { + for (alertContext in alertContexts) { + val actionResults = this.runAction(action, actionCtx.copy(alerts = listOf(alertContext)), monitor, dryrun) + triggerResult.actionResultsMap.getOrPut(alertContext.alert.id) { mutableMapOf() } + triggerResult.actionResultsMap[alertContext.alert.id]?.set(action.id, actionResults) + } + } else if (alertContexts.isNotEmpty()) { + val actionResults = this.runAction(action, actionCtx.copy(alerts = alertContexts), monitor, dryrun) + for (alert in alerts) { + triggerResult.actionResultsMap.getOrPut(alert.id) { mutableMapOf() } + triggerResult.actionResultsMap[alert.id]?.set(action.id, actionResults) + } + } + } + + // Alerts are saved after the actions since if there are failures in the actions, they can be stated in the alert + if (!dryrun && monitor.id != Monitor.NO_ID) { + val updatedAlerts = alerts.map { alert -> + val actionResults = triggerResult.actionResultsMap.getOrDefault(alert.id, emptyMap()) + val actionExecutionResults = actionResults.values.map { actionRunResult -> + ActionExecutionResult(actionRunResult.actionId, actionRunResult.executionTime, if (actionRunResult.throttled) 1 else 0) + } + alert.copy(actionExecutionResults = actionExecutionResults) + } + + retryPolicy.let { + alertService.saveAlerts( + monitor.dataSources, + updatedAlerts, + it, + routingId = monitor.id + ) + } + } + return triggerResult + } + + /** + * 1. Bulk index all findings based on shouldCreateFinding flag + * 2. invoke publishFinding() to kickstart auto-correlations + * 3. Returns a list of pairs for finding id to doc id + */ + private suspend fun createFindings( + monitor: Monitor, + docsToQueries: MutableMap>, + idQueryMap: Map, + shouldCreateFinding: Boolean, + workflowExecutionId: String? = null, + ): List> { + + val findingDocPairs = mutableListOf>() + val findings = mutableListOf() + val indexRequests = mutableListOf() + val findingsToTriggeredQueries = mutableMapOf>() + + docsToQueries.forEach { + val triggeredQueries = it.value.map { queryId -> idQueryMap[queryId]!! } + + // Before the "|" is the doc id and after the "|" is the index + val docIndex = it.key.split("|") + + val finding = Finding( + id = UUID.randomUUID().toString(), + relatedDocIds = listOf(docIndex[0]), + correlatedDocIds = listOf(docIndex[0]), + monitorId = monitor.id, + monitorName = monitor.name, + index = docIndex[1], + docLevelQueries = triggeredQueries, + timestamp = Instant.now(), + executionId = workflowExecutionId + ) + findingDocPairs.add(Pair(finding.id, it.key)) + findings.add(finding) + findingsToTriggeredQueries[finding.id] = triggeredQueries + + val findingStr = + finding.toXContent(XContentBuilder.builder(XContentType.JSON.xContent()), ToXContent.EMPTY_PARAMS) + .string() + log.debug("Findings: $findingStr") + + if (shouldCreateFinding) { + indexRequests += IndexRequest(monitor.dataSources.findingsIndex) + .source(findingStr, XContentType.JSON) + .id(finding.id) + .opType(DocWriteRequest.OpType.CREATE) + } + } + + if (indexRequests.isNotEmpty()) { + bulkIndexFindings(monitor, indexRequests) + } + + try { + findings.forEach { finding -> + publishFinding(monitor, finding) + } + } catch (e: Exception) { + // suppress exception + log.error("Optional finding callback failed", e) + } + this.findingsToTriggeredQueries += findingsToTriggeredQueries + + return findingDocPairs + } + + private suspend fun bulkIndexFindings( + monitor: Monitor, + indexRequests: List + ) { + indexRequests.chunked(findingsIndexBatchSize).forEach { batch -> + val bulkResponse: BulkResponse = client.suspendUntil { + bulk(BulkRequest().add(batch), it) + } + if (bulkResponse.hasFailures()) { + bulkResponse.items.forEach { item -> + if (item.isFailed) { + log.error("Failed indexing the finding ${item.id} of monitor [${monitor.id}]") + } + } + } else { + log.debug("[${bulkResponse.items.size}] All findings successfully indexed.") + } + } + client.execute(RefreshAction.INSTANCE, RefreshRequest(monitor.dataSources.findingsIndex)) + } + + private fun publishFinding( + monitor: Monitor, + finding: Finding + ) { + val publishFindingsRequest = PublishFindingsRequest(monitor.id, finding) + AlertingPluginInterface.publishFinding( + client as NodeClient, + publishFindingsRequest, + object : ActionListener { + override fun onResponse(response: SubscribeFindingsResponse) {} + + override fun onFailure(e: Exception) {} + } + ) + } + + suspend fun runAction( + action: Action, + ctx: TriggerExecutionContext, + monitor: Monitor, + dryrun: Boolean + ): ActionRunResult { + return try { + if (ctx is QueryLevelTriggerExecutionContext && !MonitorRunnerService.isActionActionable(action, ctx.alert)) { + return ActionRunResult(action.id, action.name, mapOf(), true, null, null) + } + val actionOutput = mutableMapOf() + actionOutput[Action.SUBJECT] = if (action.subjectTemplate != null) + compileTemplate(action.subjectTemplate!!, ctx) + else "" + actionOutput[Action.MESSAGE] = compileTemplate(action.messageTemplate, ctx) + if (Strings.isNullOrEmpty(actionOutput[Action.MESSAGE])) { + throw IllegalStateException("Message content missing in the Destination with id: ${action.destinationId}") + } + if (!dryrun) { + client.threadPool().threadContext.stashContext().use { + withClosableContext( + InjectorContextElement( + monitor.id, + settings, + client.threadPool().threadContext, + monitor.user?.roles, + monitor.user + ) + ) { + actionOutput[Action.MESSAGE_ID] = getConfigAndSendNotification( + action, + actionOutput[Action.SUBJECT], + actionOutput[Action.MESSAGE]!! + ) + } + } + } + ActionRunResult( + action.id, + action.name, + actionOutput, + false, + Instant.ofEpochMilli(client.threadPool().absoluteTimeInMillis()), + null + ) + } catch (e: Exception) { + ActionRunResult(action.id, action.name, mapOf(), false, Instant.ofEpochMilli(client.threadPool().absoluteTimeInMillis()), e) + } + } + + protected suspend fun getConfigAndSendNotification( + action: Action, + subject: String?, + message: String + ): String { + val config = getConfigForNotificationAction(action) + if (config.destination == null && config.channel == null) { + throw IllegalStateException("Unable to find a Notification Channel or Destination config with id [${action.destinationId}]") + } + + // Adding a check on TEST_ACTION Destination type here to avoid supporting it as a LegacyBaseMessage type + // just for Alerting integration tests + if (config.destination?.isTestAction() == true) { + return "test action" + } + + if (config.destination?.isAllowed(allowList) == false) { + throw IllegalStateException( + "Monitor contains a Destination type that is not allowed: ${config.destination.type}" + ) + } + + var actionResponseContent = "" + actionResponseContent = config.channel + ?.sendNotification( + client, + config.channel.getTitle(subject), + message + ) ?: actionResponseContent + + actionResponseContent = config.destination + ?.buildLegacyBaseMessage(subject, message, getDestinationContextFactory().getDestinationContext(config.destination)) + ?.publishLegacyNotification(client) + ?: actionResponseContent + + return actionResponseContent + } + + /** 1. Fetch data per shard for given index. (only 10000 docs are fetched. + * needs to be converted to scroll if not performant enough) + * 2. Transform documents to conform to format required for percolate query + * 3a. Check if docs in memory are crossing threshold defined by setting. + * 3b. If yes, perform percolate query and update docToQueries Map with all hits from percolate queries */ + private suspend fun fetchShardDataAndMaybeExecutePercolateQueries( + monitor: Monitor, + indexExecutionCtx: IndexExecutionContext, + monitorMetadata: MonitorMetadata, + inputRunResults: MutableMap>, + docsToQueries: MutableMap>, + monitorInputIndices: List, + concreteIndices: List, + fieldsToBeQueried: List, + shardList: List, + transformedDocs: MutableList>, + updateLastRunContext: (String, String) -> Unit + ) { + for (shardId in shardList) { + val shard = shardId.toString() + try { + val prevSeqNo = indexExecutionCtx.lastRunContext[shard].toString().toLongOrNull() + val from = prevSeqNo ?: SequenceNumbers.NO_OPS_PERFORMED + var to: Long = Long.MAX_VALUE + while (to >= from) { + val hits: SearchHits = searchShard( + indexExecutionCtx.concreteIndexName, + shard, + from, + to, + indexExecutionCtx.docIds, + fieldsToBeQueried, + ) + if (hits.hits.isEmpty()) { + if (to == Long.MAX_VALUE) { + updateLastRunContext(shard, (prevSeqNo ?: SequenceNumbers.NO_OPS_PERFORMED).toString()) // didn't find any docs + } + break + } + if (to == Long.MAX_VALUE) { // max sequence number of shard needs to be computed + updateLastRunContext(shard, hits.hits[0].seqNo.toString()) + } + val leastSeqNoFromHits = hits.hits.last().seqNo + to = leastSeqNoFromHits - 1 + val startTime = System.currentTimeMillis() + transformedDocs.addAll( + transformSearchHitsAndReconstructDocs( + hits, + indexExecutionCtx.indexName, + indexExecutionCtx.concreteIndexName, + monitor.id, + indexExecutionCtx.conflictingFields, + ) + ) + if ( + transformedDocs.isNotEmpty() && + shouldPerformPercolateQueryAndFlushInMemoryDocs(transformedDocs.size) + ) { + performPercolateQueryAndResetCounters( + monitor, + monitorMetadata, + monitorInputIndices, + concreteIndices, + inputRunResults, + docsToQueries, + transformedDocs + ) + } + docTransformTimeTakenStat += System.currentTimeMillis() - startTime + } + } catch (e: Exception) { + log.error( + "Monitor ${monitor.id} :" + + "Failed to run fetch data from shard [$shard] of index [${indexExecutionCtx.concreteIndexName}]. " + + "Error: ${e.message}", + e + ) + if (e is IndexClosedException) { + throw e + } + } + if ( + transformedDocs.isNotEmpty() && + shouldPerformPercolateQueryAndFlushInMemoryDocs(transformedDocs.size) + ) { + performPercolateQueryAndResetCounters( + monitor, + monitorMetadata, + monitorInputIndices, + concreteIndices, + inputRunResults, + docsToQueries, + transformedDocs + ) + } + } + } + + private suspend fun performPercolateQueryAndResetCounters( + monitor: Monitor, + monitorMetadata: MonitorMetadata, + monitorInputIndices: List, + concreteIndices: List, + inputRunResults: MutableMap>, + docsToQueries: MutableMap>, + transformedDocs: MutableList> + ) { + try { + val percolateQueryResponseHits = runPercolateQueryOnTransformedDocs( + transformedDocs, + monitor, + monitorMetadata, + concreteIndices, + monitorInputIndices, + ) + + percolateQueryResponseHits.forEach { hit -> + var id = hit.id + concreteIndices.forEach { id = id.replace("_${it}_${monitor.id}", "") } + monitorInputIndices.forEach { id = id.replace("_${it}_${monitor.id}", "") } + val docIndices = hit.field("_percolator_document_slot").values.map { it.toString().toInt() } + docIndices.forEach { idx -> + val docIndex = "${transformedDocs[idx].first}|${transformedDocs[idx].second.concreteIndexName}" + inputRunResults.getOrPut(id) { mutableSetOf() }.add(docIndex) + docsToQueries.getOrPut(docIndex) { mutableListOf() }.add(id) + } + } + totalDocsQueriedStat += transformedDocs.size.toLong() + } finally { + transformedDocs.clear() + docsSizeOfBatchInBytes = 0 + } + } + + /** Executes percolate query on the docs against the monitor's query index and return the hits from the search response*/ + private suspend fun runPercolateQueryOnTransformedDocs( + docs: MutableList>, + monitor: Monitor, + monitorMetadata: MonitorMetadata, + concreteIndices: List, + monitorInputIndices: List, + ): SearchHits { + val indices = docs.stream().map { it.second.indexName }.distinct().collect(Collectors.toList()) + val boolQueryBuilder = BoolQueryBuilder().must(buildShouldClausesOverPerIndexMatchQueries(indices)) + val percolateQueryBuilder = + PercolateQueryBuilderExt("query", docs.map { it.second.docSource }, XContentType.JSON) + if (monitor.id.isNotEmpty()) { + boolQueryBuilder.must(QueryBuilders.matchQuery("monitor_id", monitor.id).operator(Operator.AND)) + } + boolQueryBuilder.filter(percolateQueryBuilder) + val queryIndices = + docs.map { monitorMetadata.sourceToQueryIndexMapping[it.second.indexName + monitor.id] }.distinct() + if (queryIndices.isEmpty()) { + val message = + "Monitor ${monitor.id}: Failed to resolve query Indices from source indices during monitor execution!" + + " sourceIndices: $monitorInputIndices" + log.error(message) + throw AlertingException.wrap( + OpenSearchStatusException(message, RestStatus.INTERNAL_SERVER_ERROR) + ) + } + + val searchRequest = + SearchRequest().indices(*queryIndices.toTypedArray()).preference(Preference.PRIMARY_FIRST.type()) + val searchSourceBuilder = SearchSourceBuilder() + searchSourceBuilder.query(boolQueryBuilder) + searchRequest.source(searchSourceBuilder) + log.debug( + "Monitor ${monitor.id}: " + + "Executing percolate query for docs from source indices " + + "$monitorInputIndices against query index $queryIndices" + ) + var response: SearchResponse + try { + response = client.suspendUntil { + client.execute(SearchAction.INSTANCE, searchRequest, it) + } + } catch (e: Exception) { + throw IllegalStateException( + "Monitor ${monitor.id}:" + + " Failed to run percolate search for sourceIndex [${concreteIndices.joinToString()}] " + + "and queryIndex [${queryIndices.joinToString()}] for ${docs.size} document(s)", + e + ) + } + + if (response.status() !== RestStatus.OK) { + throw IOException( + "Monitor ${monitor.id}: Failed to search percolate index: ${queryIndices.joinToString()}. " + + "Response status is ${response.status()}" + ) + } + log.debug("Monitor ${monitor.id} PERF_DEBUG: Percolate query time taken millis = ${response.took}") + percolateQueriesTimeTakenStat += response.took.millis + return response.hits + } + + /** we cannot use terms query because `index` field's mapping is of type TEXT and not keyword. Refer doc-level-queries.json*/ + private fun buildShouldClausesOverPerIndexMatchQueries(indices: List): BoolQueryBuilder { + val boolQueryBuilder = QueryBuilders.boolQuery() + indices.forEach { boolQueryBuilder.should(QueryBuilders.matchQuery("index", it)) } + return boolQueryBuilder + } + + /** Executes search query on given shard of given index to fetch docs with sequene number greater than prevSeqNo. + * This method hence fetches only docs from shard which haven't been queried before + */ + private suspend fun searchShard( + index: String, + shard: String, + prevSeqNo: Long?, + maxSeqNo: Long, + docIds: List? = null, + fieldsToFetch: List, + ): SearchHits { + if (prevSeqNo?.equals(maxSeqNo) == true && maxSeqNo != 0L) { + return SearchHits.empty() + } + val boolQueryBuilder = BoolQueryBuilder() + boolQueryBuilder.filter(QueryBuilders.rangeQuery("_seq_no").gt(prevSeqNo).lte(maxSeqNo)) + + if (!docIds.isNullOrEmpty()) { + boolQueryBuilder.filter(QueryBuilders.termsQuery("_id", docIds)) + } + + val request: SearchRequest = SearchRequest() + .indices(index) + .preference("_shards:$shard") + .source( + SearchSourceBuilder() + .version(true) + .sort("_seq_no", SortOrder.DESC) + .seqNoAndPrimaryTerm(true) + .query(boolQueryBuilder) + .size(docLevelMonitorShardFetchSize) + ) + + if (fieldsToFetch.isNotEmpty() && fetchOnlyQueryFieldNames) { + request.source().fetchSource(false) + for (field in fieldsToFetch) { + request.source().fetchField(field) + } + } + val response: SearchResponse = client.suspendUntil { client.search(request, it) } + if (response.status() !== RestStatus.OK) { + throw IOException("Failed to search shard: [$shard] in index [$index]. Response status is ${response.status()}") + } + nonPercolateSearchesTimeTakenStat += response.took.millis + return response.hits + } + + /** Transform field names and index names in all the search hits to format required to run percolate search against them. + * Hits are transformed using method transformDocumentFieldNames() */ + private fun transformSearchHitsAndReconstructDocs( + hits: SearchHits, + index: String, + concreteIndex: String, + monitorId: String, + conflictingFields: List, + ): List> { + return hits.mapNotNull(fun(hit: SearchHit): Pair? { + try { + val sourceMap = if (hit.hasSource()) { + hit.sourceAsMap + } else { + constructSourceMapFromFieldsInHit(hit) + } + transformDocumentFieldNames( + sourceMap, + conflictingFields, + "_${index}_$monitorId", + "_${concreteIndex}_$monitorId", + "" + ) + var xContentBuilder = XContentFactory.jsonBuilder().map(sourceMap) + val sourceRef = BytesReference.bytes(xContentBuilder) + docsSizeOfBatchInBytes += sourceRef.ramBytesUsed() + totalDocsSizeInBytesStat += sourceRef.ramBytesUsed() + return Pair( + hit.id, + TransformedDocDto(index, concreteIndex, hit.id, sourceRef) + ) + } catch (e: Exception) { + log.error("Monitor $monitorId: Failed to transform payload $hit for percolate query", e) + // skip any document which we fail to transform because we anyway won't be able to run percolate queries on them. + return null + } + }) + } + + private fun constructSourceMapFromFieldsInHit(hit: SearchHit): MutableMap { + if (hit.fields == null) + return mutableMapOf() + val sourceMap: MutableMap = mutableMapOf() + for (field in hit.fields) { + if (field.value.values != null && field.value.values.isNotEmpty()) + if (field.value.values.size == 1) { + sourceMap[field.key] = field.value.values[0] + } else sourceMap[field.key] = field.value.values + } + return sourceMap + } + + /** + * Traverses document fields in leaves recursively and appends [fieldNameSuffixIndex] to field names with same names + * but different mappings & [fieldNameSuffixPattern] to field names which have unique names. + * + * Example for index name is my_log_index and Monitor ID is TReewWdsf2gdJFV: + * { { + * "a": { "a": { + * "b": 1234 ----> "b_my_log_index_TReewWdsf2gdJFV": 1234 + * } } + * } + * + * @param jsonAsMap Input JSON (as Map) + * @param fieldNameSuffix Field suffix which is appended to existing field name + */ + private fun transformDocumentFieldNames( + jsonAsMap: MutableMap, + conflictingFields: List, + fieldNameSuffixPattern: String, + fieldNameSuffixIndex: String, + fieldNamePrefix: String + ) { + val tempMap = mutableMapOf() + val it: MutableIterator> = jsonAsMap.entries.iterator() + while (it.hasNext()) { + val entry = it.next() + if (entry.value is Map<*, *>) { + transformDocumentFieldNames( + entry.value as MutableMap, + conflictingFields, + fieldNameSuffixPattern, + fieldNameSuffixIndex, + if (fieldNamePrefix == "") entry.key else "$fieldNamePrefix.${entry.key}" + ) + } else if (!entry.key.endsWith(fieldNameSuffixPattern) && !entry.key.endsWith(fieldNameSuffixIndex)) { + var alreadyReplaced = false + conflictingFields.forEach { conflictingField -> + if (conflictingField == "$fieldNamePrefix.${entry.key}" || (fieldNamePrefix == "" && conflictingField == entry.key)) { + tempMap["${entry.key}$fieldNameSuffixIndex"] = entry.value + it.remove() + alreadyReplaced = true + } + } + if (!alreadyReplaced) { + tempMap["${entry.key}$fieldNameSuffixPattern"] = entry.value + it.remove() + } + } + } + jsonAsMap.putAll(tempMap) + } + + private fun shouldPerformPercolateQueryAndFlushInMemoryDocs( + numDocs: Int + ): Boolean { + return isInMemoryDocsSizeExceedingMemoryLimit(docsSizeOfBatchInBytes) || + isInMemoryNumDocsExceedingMaxDocsPerPercolateQueryLimit(numDocs) + } + + /** + * Returns true, if the docs fetched from shards thus far amount to less than threshold + * amount of percentage (default:10. setting is dynamic and configurable) of the total heap size or not. + * + */ + private fun isInMemoryDocsSizeExceedingMemoryLimit(docsBytesSize: Long): Boolean { + var thresholdPercentage = percQueryDocsSizeMemoryPercentageLimit + val heapMaxBytes = JvmStats.jvmStats().mem.heapMax.bytes + val thresholdBytes = (thresholdPercentage.toDouble() / 100.0) * heapMaxBytes + + return docsBytesSize > thresholdBytes + } + + private fun isInMemoryNumDocsExceedingMaxDocsPerPercolateQueryLimit(numDocs: Int): Boolean { + var maxNumDocsThreshold = percQueryMaxNumDocsInMemory + return numDocs >= maxNumDocsThreshold + } + + /** + * Performs an mGet request to retrieve the documents associated with findings. + * + * When possible, this will only retrieve the document fields that are specifically + * referenced for printing in the mustache template. + */ + private suspend fun getDocSources( + findingToDocPairs: List>, + monitor: Monitor, + findingIdToDocSource: MutableMap + ) { + val docFieldTags = parseSampleDocTags(monitor.triggers) + val request = MultiGetRequest() + + // Perform mGet request in batches. + findingToDocPairs.chunked(findingsIndexBatchSize).forEach { batch -> + batch.forEach { (findingId, docIdAndIndex) -> + val docIdAndIndexSplit = docIdAndIndex.split("|") + val docId = docIdAndIndexSplit[0] + val concreteIndex = docIdAndIndexSplit[1] + if (findingId.isNotEmpty() && docId.isNotEmpty() && concreteIndex.isNotEmpty()) { + val docItem = MultiGetRequest.Item(concreteIndex, docId) + if (docFieldTags.isNotEmpty()) + docItem.fetchSourceContext(FetchSourceContext(true, docFieldTags.toTypedArray(), emptyArray())) + request.add(docItem) + } + val response = client.suspendUntil { client.multiGet(request, it) } + response.responses.forEach { item -> + findingIdToDocSource[findingId] = item + } + } + } + } + + /** + * The "destination" ID referenced in a Monitor Action could either be a Notification config or a Destination config + * depending on whether the background migration process has already migrated it from a Destination to a Notification config. + * + * To cover both of these cases, the Notification config will take precedence and if it is not found, the Destination will be retrieved. + */ + private suspend fun getConfigForNotificationAction( + action: Action + ): NotificationActionConfigs { + var destination: Destination? = null + var notificationPermissionException: Exception? = null + + var channel: NotificationConfigInfo? = null + try { + channel = + NotificationApiUtils.getNotificationConfigInfo(client as NodeClient, action.destinationId) + } catch (e: OpenSearchSecurityException) { + notificationPermissionException = e + } + + // If the channel was not found, try to retrieve the Destination + if (channel == null) { + destination = try { + val table = Table( + "asc", + "destination.name.keyword", + null, + 1, + 0, + null + ) + val getDestinationsRequest = GetDestinationsRequest( + action.destinationId, + 0L, + null, + table, + "ALL" + ) + + val getDestinationsResponse: GetDestinationsResponse = client.suspendUntil { + client.execute(GetDestinationsAction.INSTANCE, getDestinationsRequest, it) + } + getDestinationsResponse.destinations.firstOrNull() + } catch (e: IllegalStateException) { + // Catching the exception thrown when the Destination was not found so the NotificationActionConfigs object can be returned + null + } catch (e: OpenSearchSecurityException) { + if (notificationPermissionException != null) + throw notificationPermissionException + else + throw e + } + + if (destination == null && notificationPermissionException != null) + throw notificationPermissionException + } + + return NotificationActionConfigs(destination, channel) + } + + private fun getDestinationContextFactory(): DestinationContextFactory { + val destinationSettings = DestinationSettings.loadDestinationSettings(settings) + return DestinationContextFactory(client, xContentRegistry, destinationSettings) + } + + private fun compileTemplate(template: Script, ctx: TriggerExecutionContext): String { + return scriptService.compile(template, TemplateScript.CONTEXT) + .newInstance(template.params + mapOf("ctx" to ctx.asTemplateArg())) + .execute() + } + + private suspend fun onSuccessfulMonitorRun(monitor: Monitor) { + alertService.clearMonitorErrorAlert(monitor) + if (monitor.dataSources.alertsHistoryIndex != null) { + alertService.moveClearedErrorAlertsToHistory( + monitor.id, + monitor.dataSources.alertsIndex, + monitor.dataSources.alertsHistoryIndex!! + ) + } + } + + private fun constructErrorMessageFromTriggerResults( + triggerResults: MutableMap? = null + ): String { + var errorMessage = "" + if (triggerResults != null) { + val triggersErrorBuilder = StringBuilder() + triggerResults.forEach { + if (it.value.error != null) { + triggersErrorBuilder.append("[${it.key}]: [${it.value.error!!.userErrorMessage()}]").append(" | ") + } + } + if (triggersErrorBuilder.isNotEmpty()) { + errorMessage = "Trigger errors: $triggersErrorBuilder" + } + } + return errorMessage + } + + /** + * POJO holding information about each doc's concrete index, id, input index pattern/alias/datastream name + * and doc source. A list of these POJOs would be passed to percolate query execution logic. + */ + data class TransformedDocDto( + var indexName: String, + var concreteIndexName: String, + var docId: String, + var docSource: BytesReference + ) +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorAction.kt index 9a814bb90..cf07dd1b4 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorAction.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorAction.kt @@ -46,7 +46,7 @@ private val log = LogManager.getLogger(TransportExecuteMonitorAction::class.java private val scope: CoroutineScope = CoroutineScope(Dispatchers.IO) class TransportExecuteMonitorAction @Inject constructor( - transportService: TransportService, + private val transportService: TransportService, private val client: Client, private val clusterService: ClusterService, private val runner: MonitorRunnerService, @@ -73,14 +73,20 @@ class TransportExecuteMonitorAction @Inject constructor( // stored on the threadContext set by the security plugin when using the Alerting plugin with the Security plugin. // runner.launch(ElasticThreadContextElement(client.threadPool().threadContext)) { runner.launch { - val (periodStart, periodEnd) = + val (periodStart, periodEnd) = if (execMonitorRequest.requestStart != null) { + Pair( + Instant.ofEpochMilli(execMonitorRequest.requestStart.millis), + Instant.ofEpochMilli(execMonitorRequest.requestEnd.millis) + ) + } else { monitor.schedule.getPeriodEndingAt(Instant.ofEpochMilli(execMonitorRequest.requestEnd.millis)) + } try { log.info( "Executing monitor from API - id: ${monitor.id}, type: ${monitor.monitorType.name}, " + "periodStart: $periodStart, periodEnd: $periodEnd, dryrun: ${execMonitorRequest.dryrun}" ) - val monitorRunResult = runner.runJob(monitor, periodStart, periodEnd, execMonitorRequest.dryrun) + val monitorRunResult = runner.runJob(monitor, periodStart, periodEnd, execMonitorRequest.dryrun, transportService) withContext(Dispatchers.IO) { actionListener.onResponse(ExecuteMonitorResponse(monitorRunResult)) } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteWorkflowAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteWorkflowAction.kt index b29171f65..4749fef6f 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteWorkflowAction.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteWorkflowAction.kt @@ -38,7 +38,7 @@ import java.time.Instant private val log = LogManager.getLogger(TransportExecuteWorkflowAction::class.java) class TransportExecuteWorkflowAction @Inject constructor( - transportService: TransportService, + private val transportService: TransportService, private val client: Client, private val runner: MonitorRunnerService, actionFilters: ActionFilters, @@ -58,15 +58,27 @@ class TransportExecuteWorkflowAction @Inject constructor( client.threadPool().threadContext.stashContext().use { val executeWorkflow = fun(workflow: Workflow) { runner.launch { - val (periodStart, periodEnd) = + val (periodStart, periodEnd) = if (execWorkflowRequest.requestStart != null) { + Pair( + Instant.ofEpochMilli(execWorkflowRequest.requestStart.millis), + Instant.ofEpochMilli(execWorkflowRequest.requestEnd.millis) + ) + } else { workflow.schedule.getPeriodEndingAt(Instant.ofEpochMilli(execWorkflowRequest.requestEnd.millis)) + } try { log.info( "Executing workflow from API - id: ${workflow.id}, periodStart: $periodStart, periodEnd: $periodEnd, " + "dryrun: ${execWorkflowRequest.dryrun}" ) val workflowRunResult = - MonitorRunnerService.runJob(workflow, periodStart, periodEnd, execWorkflowRequest.dryrun) + MonitorRunnerService.runJob( + workflow, + periodStart, + periodEnd, + execWorkflowRequest.dryrun, + transportService = transportService + ) withContext(Dispatchers.IO, { actionListener.onResponse( ExecuteWorkflowResponse( diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/util/AlertingException.kt b/alerting/src/main/kotlin/org/opensearch/alerting/util/AlertingException.kt index 4127afaa2..3ed7a8674 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/util/AlertingException.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/util/AlertingException.kt @@ -20,7 +20,7 @@ private val log = LogManager.getLogger(AlertingException::class.java) /** * Converts into a user friendly message. */ -class AlertingException(message: String, val status: RestStatus, ex: Exception) : OpenSearchException(message, ex) { +class AlertingException(message: String, val status: RestStatus, val ex: Exception) : OpenSearchException(message, ex) { override fun status(): RestStatus { return status @@ -69,5 +69,21 @@ class AlertingException(message: String, val status: RestStatus, ex: Exception) // Below logic is to set friendly message to error.root_cause.reason. return AlertingException(friendlyMsg, status, Exception("${ex.javaClass.name}: ${ex.message}")) } + + @JvmStatic + fun merge(vararg ex: AlertingException): AlertingException { + var friendlyMsg = "" + var unwrappedExceptionMsg = "" + ex.forEach { + if (friendlyMsg != "") { + friendlyMsg += ", ${it.message}" + unwrappedExceptionMsg += ", ${it.ex.message}" + } else { + friendlyMsg = it.message.orEmpty() + unwrappedExceptionMsg = "${it.ex.message}" + } + } + return AlertingException(friendlyMsg, ex.first().status, Exception(unwrappedExceptionMsg)) + } } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/CompositeWorkflowRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/workflow/CompositeWorkflowRunner.kt index e68896bc2..260113736 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/CompositeWorkflowRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/workflow/CompositeWorkflowRunner.kt @@ -38,6 +38,7 @@ import org.opensearch.index.query.QueryBuilders import org.opensearch.index.query.QueryBuilders.boolQuery import org.opensearch.index.query.QueryBuilders.existsQuery import org.opensearch.index.query.QueryBuilders.termsQuery +import org.opensearch.transport.TransportService import java.time.Instant import java.time.LocalDateTime import java.time.ZoneOffset @@ -53,6 +54,7 @@ object CompositeWorkflowRunner : WorkflowRunner() { periodStart: Instant, periodEnd: Instant, dryRun: Boolean, + transportService: TransportService ): WorkflowRunResult { val workflowExecutionStartTime = Instant.now() @@ -134,7 +136,16 @@ object CompositeWorkflowRunner : WorkflowRunner() { try { dataSources = delegateMonitor.dataSources val delegateRunResult = - runDelegateMonitor(delegateMonitor, monitorCtx, periodStart, periodEnd, dryRun, workflowRunContext, executionId) + runDelegateMonitor( + delegateMonitor, + monitorCtx, + periodStart, + periodEnd, + dryRun, + workflowRunContext, + executionId, + transportService + ) resultList.add(delegateRunResult!!) } catch (ex: Exception) { logger.error("Error executing workflow delegate monitor ${delegate.monitorId}", ex) @@ -242,6 +253,7 @@ object CompositeWorkflowRunner : WorkflowRunner() { dryRun: Boolean, workflowRunContext: WorkflowRunContext, executionId: String, + transportService: TransportService ): MonitorRunResult<*>? { if (delegateMonitor.isBucketLevelMonitor()) { @@ -252,7 +264,8 @@ object CompositeWorkflowRunner : WorkflowRunner() { periodEnd, dryRun, workflowRunContext, - executionId + executionId, + transportService ) } else if (delegateMonitor.isDocLevelMonitor()) { return DocumentLevelMonitorRunner().runMonitor( @@ -262,7 +275,8 @@ object CompositeWorkflowRunner : WorkflowRunner() { periodEnd, dryRun, workflowRunContext, - executionId + executionId, + transportService ) } else if (delegateMonitor.isQueryLevelMonitor()) { return QueryLevelMonitorRunner.runMonitor( @@ -272,7 +286,8 @@ object CompositeWorkflowRunner : WorkflowRunner() { periodEnd, dryRun, workflowRunContext, - executionId + executionId, + transportService ) } else { throw AlertingException.wrap( diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunContext.kt b/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunContext.kt index 14488a16a..8ebb2be9c 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunContext.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunContext.kt @@ -5,11 +5,51 @@ package org.opensearch.alerting.workflow +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.common.io.stream.Writeable +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.ToXContentObject +import org.opensearch.core.xcontent.XContentBuilder + data class WorkflowRunContext( // In case of dry run it's random generated id, while in other cases it's workflowId val workflowId: String, val workflowMetadataId: String, val chainedMonitorId: String?, val matchingDocIdsPerIndex: Map>, - val auditDelegateMonitorAlerts: Boolean -) + val auditDelegateMonitorAlerts: Boolean, +) : Writeable, ToXContentObject { + companion object { + fun readFrom(sin: StreamInput): WorkflowRunContext { + return WorkflowRunContext(sin) + } + } + + constructor(sin: StreamInput) : this( + sin.readString(), + sin.readString(), + sin.readOptionalString(), + sin.readMap() as Map>, + sin.readBoolean() + ) + + override fun writeTo(out: StreamOutput) { + out.writeString(workflowId) + out.writeString(workflowMetadataId) + out.writeOptionalString(chainedMonitorId) + out.writeMap(matchingDocIdsPerIndex) + out.writeBoolean(auditDelegateMonitorAlerts) + } + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params?): XContentBuilder { + builder.startObject() + .field("workflow_id", workflowId) + .field("workflow_metadata_id", workflowMetadataId) + .field("chained_monitor_id", chainedMonitorId) + .field("matching_doc_ids_per_index", matchingDocIdsPerIndex) + .field("audit_delegate_monitor_alerts", auditDelegateMonitorAlerts) + .endObject() + return builder + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunner.kt index ce6ac23f9..83fbf7212 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunner.kt @@ -33,6 +33,7 @@ import org.opensearch.commons.notifications.model.NotificationConfigInfo import org.opensearch.core.common.Strings import org.opensearch.script.Script import org.opensearch.script.TemplateScript +import org.opensearch.transport.TransportService import java.time.Instant abstract class WorkflowRunner { @@ -41,7 +42,8 @@ abstract class WorkflowRunner { monitorCtx: MonitorRunnerExecutionContext, periodStart: Instant, periodEnd: Instant, - dryRun: Boolean + dryRun: Boolean, + transportService: TransportService ): WorkflowRunResult suspend fun runAction( diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/AlertingRestTestCase.kt b/alerting/src/test/kotlin/org/opensearch/alerting/AlertingRestTestCase.kt index 251ef98f7..4d4c9ff6f 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/AlertingRestTestCase.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/AlertingRestTestCase.kt @@ -921,6 +921,17 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { return response } + public fun indexDoc(client: RestClient, index: String, doc: String, refresh: Boolean = true): Response { + val requestBody = StringEntity(doc, APPLICATION_JSON) + val params = if (refresh) mapOf("refresh" to "true") else mapOf() + val response = client.makeRequest("POST", "$index/_doc?op_type=create", params, requestBody) + assertTrue( + "Unable to index doc: '${doc.take(15)}...' to index: '$index'", + listOf(RestStatus.OK, RestStatus.CREATED).contains(response.restStatus()) + ) + return response + } + protected fun deleteDoc(index: String, id: String, refresh: Boolean = true): Response { val params = if (refresh) mapOf("refresh" to "true") else mapOf() val response = client().makeRequest("DELETE", "$index/_doc/$id", params) @@ -944,6 +955,20 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { return index } + protected fun createTestIndex(index: String = randomAlphaOfLength(10).lowercase(Locale.ROOT), settings: Settings): String { + createIndex( + index, settings, + """ + "properties" : { + "test_strict_date_time" : { "type" : "date", "format" : "strict_date_time" }, + "test_field" : { "type" : "keyword" }, + "number" : { "type" : "keyword" } + } + """.trimIndent() + ) + return index + } + protected fun createTestIndex(index: String, mapping: String): String { createIndex(index, Settings.EMPTY, mapping.trimIndent()) return index @@ -1043,7 +1068,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { client().makeRequest("DELETE", "_data_stream/$datastream") } - protected fun createIndexAlias(alias: String, mappings: String?) { + protected fun createIndexAlias(alias: String, mappings: String?, setting: String? = "") { val indexPattern = "$alias*" var componentTemplateMappings = "\"properties\": {" + " \"netflow.destination_transport_port\":{ \"type\": \"long\" }," + @@ -1052,9 +1077,10 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { if (mappings != null) { componentTemplateMappings = mappings } - createComponentTemplateWithMappings( + createComponentTemplateWithMappingsAndSettings( "my_alias_component_template-$alias", - componentTemplateMappings + componentTemplateMappings, + setting ) createComposableIndexTemplate( "my_index_template_alias-$alias", @@ -1090,6 +1116,17 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { ) } + protected fun createComponentTemplateWithMappingsAndSettings(componentTemplateName: String, mappings: String?, setting: String?) { + val body = """{"template" : { "mappings": {$mappings}, "settings": {$setting} }}""" + client().makeRequest( + "PUT", + "_component_template/$componentTemplateName", + emptyMap(), + StringEntity(body, ContentType.APPLICATION_JSON), + BasicHeader("Content-Type", "application/json") + ) + } + protected fun createComposableIndexTemplate( templateName: String, indexPatterns: List, diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/DocumentMonitorRunnerIT.kt b/alerting/src/test/kotlin/org/opensearch/alerting/DocumentMonitorRunnerIT.kt index b9ab6289b..575ccacd9 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/DocumentMonitorRunnerIT.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/DocumentMonitorRunnerIT.kt @@ -14,6 +14,7 @@ import org.opensearch.alerting.core.lock.LockService import org.opensearch.alerting.settings.AlertingSettings import org.opensearch.client.Response import org.opensearch.client.ResponseException +import org.opensearch.common.settings.Settings import org.opensearch.common.xcontent.json.JsonXContent import org.opensearch.commons.alerting.model.Alert import org.opensearch.commons.alerting.model.DataSources @@ -2644,10 +2645,108 @@ class DocumentMonitorRunnerIT : AlertingRestTestCase() { } } - @Suppress("UNCHECKED_CAST") - /** helper that returns a field in a json map whose values are all json objects */ - private fun Map.objectMap(key: String): Map> { - return this[key] as Map> + fun `test document-level monitor fanout which generates alerts and findings`() { + val testIndex = createTestIndex(settings = Settings.builder().put("number_of_shards", "7").build()) + val testTime = DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(ZonedDateTime.now().truncatedTo(MILLIS)) + val testDoc = """{ + "message" : "This is an error from IAD region", + "test_strict_date_time" : "$testTime", + "test_field" : "us-west-2" + }""" + + val docQuery = DocLevelQuery(query = "test_field:\"us-west-2\"", fields = listOf(), name = "3") + val docLevelInput = DocLevelMonitorInput("description", listOf(testIndex), listOf(docQuery)) + + val trigger = randomDocumentLevelTrigger(condition = ALWAYS_RUN) + val monitor = createMonitor( + randomDocumentLevelMonitor( + inputs = listOf(docLevelInput), + triggers = listOf(trigger), + enabled = true, + schedule = IntervalSchedule(1, ChronoUnit.MINUTES) + ) + ) + assertNotNull(monitor.id) + + indexDoc(testIndex, "1", testDoc) + indexDoc(testIndex, "2", testDoc) + indexDoc(testIndex, "4", testDoc) + indexDoc(testIndex, "5", testDoc) + indexDoc(testIndex, "6", testDoc) + indexDoc(testIndex, "7", testDoc) + + OpenSearchTestCase.waitUntil( + { searchFindings(monitor).size == 6 && searchAlertsWithFilter(monitor).size == 6 }, 2, TimeUnit.MINUTES + ) + + indexDoc(testIndex, "11", testDoc) + indexDoc(testIndex, "12", testDoc) + indexDoc(testIndex, "14", testDoc) + indexDoc(testIndex, "15", testDoc) + indexDoc(testIndex, "16", testDoc) + indexDoc(testIndex, "17", testDoc) + + OpenSearchTestCase.waitUntil( + { searchFindings(monitor).size == 6 && searchAlertsWithFilter(monitor).size == 6 }, 2, TimeUnit.MINUTES + ) + } + + fun `test document-level monitor fanout when aliases contain indices with multiple shards`() { + val aliasName = "test-alias" + createIndexAlias( + aliasName, + """ + "properties" : { + "test_strict_date_time" : { "type" : "date", "format" : "strict_date_time" }, + "test_field" : { "type" : "keyword" }, + "number" : { "type" : "keyword" } + } + """.trimIndent(), + "\"index.number_of_shards\": 7" + ) + + val docQuery = DocLevelQuery(query = "test_field:\"us-west-2\"", fields = listOf(), name = "3") + val docLevelInput = DocLevelMonitorInput("description", listOf(aliasName), listOf(docQuery)) + + val action = randomAction(template = randomTemplateScript("Hello {{ctx.monitor.name}}"), destinationId = createDestination().id) + val monitor = createMonitor( + randomDocumentLevelMonitor( + inputs = listOf(docLevelInput), + triggers = listOf(randomDocumentLevelTrigger(condition = ALWAYS_RUN, actions = listOf(action))), + enabled = true, + schedule = IntervalSchedule(1, ChronoUnit.MINUTES) + ) + ) + + val testTime = DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(ZonedDateTime.now().truncatedTo(MILLIS)) + val testDoc = """{ + "@timestamp": "$testTime", + "message" : "This is an error from IAD region", + "test_strict_date_time" : "$testTime", + "test_field" : "us-west-2" + }""" + indexDoc(aliasName, "1", testDoc) + indexDoc(aliasName, "2", testDoc) + indexDoc(aliasName, "4", testDoc) + indexDoc(aliasName, "5", testDoc) + indexDoc(aliasName, "6", testDoc) + indexDoc(aliasName, "7", testDoc) + OpenSearchTestCase.waitUntil( + { searchFindings(monitor).size == 6 }, 2, TimeUnit.MINUTES + ) + + rolloverDatastream(aliasName) + indexDoc(aliasName, "11", testDoc) + indexDoc(aliasName, "12", testDoc) + indexDoc(aliasName, "14", testDoc) + indexDoc(aliasName, "15", testDoc) + indexDoc(aliasName, "16", testDoc) + indexDoc(aliasName, "17", testDoc) + OpenSearchTestCase.waitUntil( + { searchFindings(monitor).size == 6 }, 2, TimeUnit.MINUTES + ) + + deleteDataStream(aliasName) } fun `test execute monitor with non-null owner`() { diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/MonitorDataSourcesIT.kt b/alerting/src/test/kotlin/org/opensearch/alerting/MonitorDataSourcesIT.kt index 963d659c3..c4f3b0b03 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/MonitorDataSourcesIT.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/MonitorDataSourcesIT.kt @@ -844,7 +844,10 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { .get() Assert.assertTrue(getAlertsResponse != null) Assert.assertTrue(getAlertsResponse.alerts.size == 1) - Assert.assertTrue(getAlertsResponse.alerts[0].errorMessage == "IndexClosedException[closed]") + Assert.assertTrue( + getAlertsResponse.alerts[0].errorMessage == + "AlertingException[closed]; nested: Exception[org.opensearch.indices.IndexClosedException: closed]; " + ) // Reopen index client().admin().indices().open(OpenIndexRequest(index)).get() // Close queryIndex @@ -862,7 +865,10 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { .get() Assert.assertTrue(getAlertsResponse != null) Assert.assertTrue(getAlertsResponse.alerts.size == 1) - Assert.assertTrue(getAlertsResponse.alerts[0].errorHistory[0].message == "IndexClosedException[closed]") + Assert.assertTrue( + getAlertsResponse.alerts[0].errorHistory[0].message == + "AlertingException[closed]; nested: Exception[org.opensearch.indices.IndexClosedException: closed]; " + ) Assert.assertEquals(1, getAlertsResponse.alerts[0].errorHistory.size) Assert.assertTrue(getAlertsResponse.alerts[0].errorMessage!!.contains("Failed to run percolate search")) } @@ -951,7 +957,10 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { .get() Assert.assertTrue(getAlertsResponse != null) Assert.assertEquals(1, getAlertsResponse.alerts.size) - Assert.assertTrue(getAlertsResponse.alerts[0].errorMessage == "IndexClosedException[closed]") + Assert.assertTrue( + getAlertsResponse.alerts[0].errorMessage == + "AlertingException[closed]; nested: Exception[org.opensearch.indices.IndexClosedException: closed]; " + ) Assert.assertNull(getAlertsResponse.alerts[0].endTime) // Open index to have monitor run successfully @@ -974,7 +983,10 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { .get() Assert.assertTrue(getAlertsResponse != null) Assert.assertEquals(1, getAlertsResponse.alerts.size) - Assert.assertTrue(getAlertsResponse.alerts[0].errorMessage == "IndexClosedException[closed]") + Assert.assertTrue( + getAlertsResponse.alerts[0].errorMessage == + "AlertingException[closed]; nested: Exception[org.opensearch.indices.IndexClosedException: closed]; " + ) Assert.assertNotNull(getAlertsResponse.alerts[0].endTime) } @@ -1036,7 +1048,10 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { Assert.assertTrue(getAlertsResponse != null) Assert.assertEquals(1 + 10, getAlertsResponse.alerts.size) - val newErrorAlert = getAlertsResponse.alerts.firstOrNull { it.errorMessage == "IndexClosedException[closed]" } + val newErrorAlert = getAlertsResponse.alerts.firstOrNull { + it.errorMessage == + "AlertingException[closed]; nested: Exception[org.opensearch.indices.IndexClosedException: closed]; " + } Assert.assertNotNull(newErrorAlert) Assert.assertNull(newErrorAlert!!.endTime) diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/TestHelpers.kt b/alerting/src/test/kotlin/org/opensearch/alerting/TestHelpers.kt index e4d63753b..209df41f0 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/TestHelpers.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/TestHelpers.kt @@ -815,3 +815,9 @@ fun randomAlertContext( sampleDocs = sampleDocs ) } + +@Suppress("UNCHECKED_CAST") +/** helper that returns a field in a json map whose values are all json objects */ +fun Map.objectMap(key: String): Map> { + return this[key] as Map> +} diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutRequestTests.kt b/alerting/src/test/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutRequestTests.kt new file mode 100644 index 000000000..05d59063c --- /dev/null +++ b/alerting/src/test/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutRequestTests.kt @@ -0,0 +1,90 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.action + +import org.opensearch.alerting.ALWAYS_RUN +import org.opensearch.alerting.model.ActionExecutionTime +import org.opensearch.alerting.model.IndexExecutionContext +import org.opensearch.alerting.model.MonitorMetadata +import org.opensearch.alerting.randomDocumentLevelMonitor +import org.opensearch.alerting.randomDocumentLevelTrigger +import org.opensearch.alerting.workflow.WorkflowRunContext +import org.opensearch.common.io.stream.BytesStreamOutput +import org.opensearch.commons.alerting.model.DocLevelMonitorInput +import org.opensearch.commons.alerting.model.DocLevelQuery +import org.opensearch.commons.alerting.model.IntervalSchedule +import org.opensearch.commons.alerting.model.Monitor +import org.opensearch.commons.alerting.model.Workflow +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.index.shard.ShardId +import org.opensearch.index.seqno.SequenceNumbers +import org.opensearch.test.OpenSearchTestCase +import java.time.Instant +import java.time.temporal.ChronoUnit +import java.util.UUID + +class DocLevelMonitorFanOutRequestTests : OpenSearchTestCase() { + + fun `test doc level monitor fan out request as stream`() { + val docQuery = DocLevelQuery(query = "test_field:\"us-west-2\"", fields = listOf(), name = "3") + val docLevelInput = DocLevelMonitorInput("description", listOf("test-index"), listOf(docQuery)) + + val trigger = randomDocumentLevelTrigger(condition = ALWAYS_RUN) + val monitor = randomDocumentLevelMonitor( + inputs = listOf(docLevelInput), + triggers = listOf(trigger), + enabled = true, + schedule = IntervalSchedule(1, ChronoUnit.MINUTES) + ) + val monitorMetadata = MonitorMetadata( + "test", + SequenceNumbers.UNASSIGNED_SEQ_NO, + SequenceNumbers.UNASSIGNED_PRIMARY_TERM, + Monitor.NO_ID, + listOf(ActionExecutionTime("", Instant.now())), + mutableMapOf("index" to mutableMapOf("1" to "1")), + mutableMapOf("test-index" to ".opensearch-sap-test_windows-queries-000001") + ) + val indexExecutionContext = IndexExecutionContext( + listOf(docQuery), + mutableMapOf("index" to mutableMapOf("1" to "1")), + mutableMapOf("index" to mutableMapOf("1" to "1")), + "test-index", + "test-index", + listOf("test-index"), + listOf("test-index"), + listOf("test-field"), + listOf("1", "2") + ) + val workflowRunContext = WorkflowRunContext( + Workflow.NO_ID, + Workflow.NO_ID, + Monitor.NO_ID, + mutableMapOf("index" to listOf("1")), + true + ) + val docLevelMonitorFanOutRequest = DocLevelMonitorFanOutRequest( + monitor, + false, + monitorMetadata, + UUID.randomUUID().toString(), + indexExecutionContext, + listOf(ShardId("test-index", UUID.randomUUID().toString(), 0)), + listOf("test-index"), + workflowRunContext + ) + val out = BytesStreamOutput() + docLevelMonitorFanOutRequest.writeTo(out) + val sin = StreamInput.wrap(out.bytes().toBytesRef().bytes) + val newDocLevelMonitorFanOutRequest = DocLevelMonitorFanOutRequest(sin) + assertEquals(docLevelMonitorFanOutRequest.monitor, newDocLevelMonitorFanOutRequest.monitor) + assertEquals(docLevelMonitorFanOutRequest.executionId, newDocLevelMonitorFanOutRequest.executionId) + assertEquals(docLevelMonitorFanOutRequest.monitorMetadata, newDocLevelMonitorFanOutRequest.monitorMetadata) + assertEquals(docLevelMonitorFanOutRequest.indexExecutionContext, newDocLevelMonitorFanOutRequest.indexExecutionContext) + assertEquals(docLevelMonitorFanOutRequest.shardIds, newDocLevelMonitorFanOutRequest.shardIds) + assertEquals(docLevelMonitorFanOutRequest.workflowRunContext, newDocLevelMonitorFanOutRequest.workflowRunContext) + } +} diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutResponseTests.kt b/alerting/src/test/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutResponseTests.kt new file mode 100644 index 000000000..c3b1d38a3 --- /dev/null +++ b/alerting/src/test/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutResponseTests.kt @@ -0,0 +1,56 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.action + +import org.opensearch.alerting.model.InputRunResults +import org.opensearch.alerting.randomDocumentLevelTriggerRunResult +import org.opensearch.common.io.stream.BytesStreamOutput +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.test.OpenSearchTestCase + +class DocLevelMonitorFanOutResponseTests : OpenSearchTestCase() { + fun `test doc level monitor fan out response with errors as stream`() { + val docLevelMonitorFanOutResponse = DocLevelMonitorFanOutResponse( + "nodeid", + "eid", + "monitorId", + mutableMapOf("index" to mutableMapOf("1" to "1")), + InputRunResults(error = null), + mapOf("1" to randomDocumentLevelTriggerRunResult(), "2" to randomDocumentLevelTriggerRunResult()) + ) + val out = BytesStreamOutput() + docLevelMonitorFanOutResponse.writeTo(out) + val sin = StreamInput.wrap(out.bytes().toBytesRef().bytes) + val newDocLevelMonitorFanOutResponse = DocLevelMonitorFanOutResponse(sin) + assertEquals(docLevelMonitorFanOutResponse.nodeId, newDocLevelMonitorFanOutResponse.nodeId) + assertEquals(docLevelMonitorFanOutResponse.executionId, newDocLevelMonitorFanOutResponse.executionId) + assertEquals(docLevelMonitorFanOutResponse.monitorId, newDocLevelMonitorFanOutResponse.monitorId) + assertEquals(docLevelMonitorFanOutResponse.lastRunContexts, newDocLevelMonitorFanOutResponse.lastRunContexts) + assertEquals(docLevelMonitorFanOutResponse.inputResults, newDocLevelMonitorFanOutResponse.inputResults) + assertEquals(docLevelMonitorFanOutResponse.triggerResults, newDocLevelMonitorFanOutResponse.triggerResults) + } + + fun `test doc level monitor fan out response as stream`() { + val workflow = DocLevelMonitorFanOutResponse( + "nodeid", + "eid", + "monitorId", + mapOf("index" to mapOf("1" to "1")) as MutableMap, + InputRunResults(), + mapOf("1" to randomDocumentLevelTriggerRunResult(), "2" to randomDocumentLevelTriggerRunResult()) + ) + val out = BytesStreamOutput() + workflow.writeTo(out) + val sin = StreamInput.wrap(out.bytes().toBytesRef().bytes) + val newWorkflow = DocLevelMonitorFanOutResponse(sin) + assertEquals(workflow.nodeId, newWorkflow.nodeId) + assertEquals(workflow.executionId, newWorkflow.executionId) + assertEquals(workflow.monitorId, newWorkflow.monitorId) + assertEquals(workflow.lastRunContexts, newWorkflow.lastRunContexts) + assertEquals(workflow.inputResults, newWorkflow.inputResults) + assertEquals(workflow.triggerResults, newWorkflow.triggerResults) + } +} diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/action/GetFindingsRequestTests.kt b/alerting/src/test/kotlin/org/opensearch/alerting/action/GetFindingsRequestTests.kt index d1bd6f7e3..990da66fa 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/action/GetFindingsRequestTests.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/action/GetFindingsRequestTests.kt @@ -9,6 +9,7 @@ import org.opensearch.common.io.stream.BytesStreamOutput import org.opensearch.commons.alerting.action.GetFindingsRequest import org.opensearch.commons.alerting.model.Table import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.index.query.BoolQueryBuilder import org.opensearch.test.OpenSearchTestCase class GetFindingsRequestTests : OpenSearchTestCase() { @@ -17,7 +18,13 @@ class GetFindingsRequestTests : OpenSearchTestCase() { val table = Table("asc", "sortString", null, 1, 0, "") - val req = GetFindingsRequest("2121", table, "1", "finding_index_name") + val req = GetFindingsRequest( + "2121", + table, + "1", + "finding_index_name", + boolQueryBuilder = BoolQueryBuilder() + ) assertNotNull(req) val out = BytesStreamOutput() diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/model/WriteableTests.kt b/alerting/src/test/kotlin/org/opensearch/alerting/model/WriteableTests.kt index 6ef15f8d8..c1d3ac304 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/model/WriteableTests.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/model/WriteableTests.kt @@ -5,23 +5,24 @@ package org.opensearch.alerting.model +import org.junit.Assert import org.opensearch.alerting.model.destination.email.EmailAccount import org.opensearch.alerting.model.destination.email.EmailGroup -import org.opensearch.alerting.randomActionRunResult import org.opensearch.alerting.randomBucketLevelMonitorRunResult import org.opensearch.alerting.randomBucketLevelTriggerRunResult import org.opensearch.alerting.randomDocumentLevelMonitorRunResult -import org.opensearch.alerting.randomDocumentLevelTriggerRunResult import org.opensearch.alerting.randomEmailAccount import org.opensearch.alerting.randomEmailGroup import org.opensearch.alerting.randomInputRunResults import org.opensearch.alerting.randomQueryLevelMonitorRunResult import org.opensearch.alerting.randomQueryLevelTriggerRunResult +import org.opensearch.common.UUIDs import org.opensearch.common.io.stream.BytesStreamOutput import org.opensearch.commons.alerting.model.SearchInput import org.opensearch.core.common.io.stream.StreamInput import org.opensearch.search.builder.SearchSourceBuilder import org.opensearch.test.OpenSearchTestCase +import java.time.Instant class WriteableTests : OpenSearchTestCase() { @@ -126,4 +127,35 @@ class WriteableTests : OpenSearchTestCase() { val newEmailGroup = EmailGroup.readFrom(sin) assertEquals("Round tripping EmailGroup doesn't work", emailGroup, newEmailGroup) } + + fun `test DocumentLevelTriggerRunResult as stream`() { + val workflow = randomDocumentLevelTriggerRunResult() + val out = BytesStreamOutput() + workflow.writeTo(out) + val sin = StreamInput.wrap(out.bytes().toBytesRef().bytes) + val newWorkflow = DocumentLevelTriggerRunResult(sin) + Assert.assertEquals("Round tripping dltrr failed", newWorkflow, workflow) + } + + fun randomDocumentLevelTriggerRunResult(): DocumentLevelTriggerRunResult { + val map = mutableMapOf() + map.plus(Pair("key1", randomActionRunResult())) + map.plus(Pair("key2", randomActionRunResult())) + return DocumentLevelTriggerRunResult( + "trigger-name", + mutableListOf(UUIDs.randomBase64UUID().toString()), + null, + mutableMapOf(Pair("alertId", map)) + ) + } + + fun randomActionRunResult(): ActionRunResult { + val map = mutableMapOf() + map.plus(Pair("key1", "val1")) + map.plus(Pair("key2", "val2")) + return ActionRunResult( + "1234", "test-action", map, + false, Instant.now(), null + ) + } } diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/resthandler/SecureWorkflowRestApiIT.kt b/alerting/src/test/kotlin/org/opensearch/alerting/resthandler/SecureWorkflowRestApiIT.kt index 941922040..3f1b45fa8 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/resthandler/SecureWorkflowRestApiIT.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/resthandler/SecureWorkflowRestApiIT.kt @@ -9,10 +9,10 @@ import org.apache.http.HttpHeaders import org.apache.http.entity.ContentType import org.apache.http.message.BasicHeader import org.apache.http.nio.entity.NStringEntity +import org.apache.lucene.tests.util.LuceneTestCase.AwaitsFix import org.junit.After import org.junit.Before import org.junit.BeforeClass -import org.junit.Ignore import org.opensearch.alerting.ALERTING_BASE_URI import org.opensearch.alerting.ALERTING_DELETE_WORKFLOW_ACCESS import org.opensearch.alerting.ALERTING_EXECUTE_WORKFLOW_ACCESS @@ -63,7 +63,7 @@ import org.opensearch.test.junit.annotations.TestLogging import java.time.Instant // TODO investigate flaky nature of tests. not reproducible in local but fails in jenkins CI -@Ignore +@AwaitsFix(bugUrl = "") @TestLogging("level:DEBUG", reason = "Debug for tests.") @Suppress("UNCHECKED_CAST") class SecureWorkflowRestApiIT : AlertingRestTestCase() { diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/triggeraction/TriggerExpressionResolverTests.kt b/alerting/src/test/kotlin/org/opensearch/alerting/triggeraction/TriggerExpressionResolverTests.kt index 1dd19d9d1..67b8f7e9f 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/triggeraction/TriggerExpressionResolverTests.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/triggeraction/TriggerExpressionResolverTests.kt @@ -27,7 +27,7 @@ class TriggerExpressionResolverTests : OpenSearchTestCase() { val equation = TriggerExpressionParser(eqString).parse() val queryToDocIds = mutableMapOf>() queryToDocIds[DocLevelQuery("", "sigma-123", listOf(), "", emptyList())] = mutableSetOf("6", "3", "7") - queryToDocIds[DocLevelQuery("id1456", "", listOf(), "", emptyList())] = mutableSetOf("1", "2", "3") + queryToDocIds[DocLevelQuery("id1456", "sigma-456", listOf(), "", emptyList())] = mutableSetOf("1", "2", "3") Assert.assertEquals("query[name=sigma-123] query[id=id1456] && ", equation.toString()) Assert.assertEquals(mutableSetOf("3"), equation.evaluate(queryToDocIds)) } @@ -37,7 +37,7 @@ class TriggerExpressionResolverTests : OpenSearchTestCase() { val equation = TriggerExpressionParser(eqString).parse() val queryToDocIds = mutableMapOf>() queryToDocIds[DocLevelQuery("", "sigma-123", listOf(), "", emptyList())] = mutableSetOf("6", "8", "7") - queryToDocIds[DocLevelQuery("", "", listOf(), "", mutableListOf("tag=sev2"))] = mutableSetOf("1", "2", "3") + queryToDocIds[DocLevelQuery("", "sigma-456", listOf(), "", mutableListOf("tag=sev2"))] = mutableSetOf("1", "2", "3") Assert.assertEquals("query[name=sigma-123] query[tag=sev2] && ", equation.toString()) Assert.assertEquals(emptySet(), equation.evaluate(queryToDocIds)) } @@ -57,7 +57,7 @@ class TriggerExpressionResolverTests : OpenSearchTestCase() { val equation = TriggerExpressionParser(eqString).parse() val queryToDocIds = mutableMapOf>() queryToDocIds[DocLevelQuery("", "sigma-123", listOf(), "", emptyList())] = mutableSetOf("6", "3", "7") - queryToDocIds[DocLevelQuery("id1456", "", listOf(), "", emptyList())] = mutableSetOf("1", "2", "3") + queryToDocIds[DocLevelQuery("id1456", "sigma-456", listOf(), "", emptyList())] = mutableSetOf("1", "2", "3") Assert.assertEquals("query[name=sigma-123] query[id=id1456] || ", equation.toString()) Assert.assertEquals(mutableSetOf("6", "3", "7", "1", "2", "3"), equation.evaluate(queryToDocIds)) } @@ -67,7 +67,7 @@ class TriggerExpressionResolverTests : OpenSearchTestCase() { val equation = TriggerExpressionParser(eqString).parse() val queryToDocIds = mutableMapOf>() queryToDocIds[DocLevelQuery("", "sigma-123", listOf(), "", emptyList())] = mutableSetOf("6", "8", "7") - queryToDocIds[DocLevelQuery("", "", listOf(), "", mutableListOf("tag=sev2"))] = emptySet() + queryToDocIds[DocLevelQuery("", "sigma-456", listOf(), "", mutableListOf("tag=sev2"))] = emptySet() Assert.assertEquals("query[name=sigma-123] query[tag=sev2] || ", equation.toString()) Assert.assertEquals(mutableSetOf("6", "8", "7"), equation.evaluate(queryToDocIds)) } @@ -88,7 +88,7 @@ class TriggerExpressionResolverTests : OpenSearchTestCase() { val queryToDocIds = mutableMapOf>() queryToDocIds[DocLevelQuery("", "sigma-123", listOf(), "", emptyList())] = mutableSetOf("1", "2", "3", "11") queryToDocIds[DocLevelQuery("", "sigma-456", listOf(), "", emptyList())] = mutableSetOf("3", "4", "5") - queryToDocIds[DocLevelQuery("id_new", "", listOf(), "", emptyList())] = mutableSetOf("11", "12", "13") + queryToDocIds[DocLevelQuery("id_new", "sigma-789", listOf(), "", emptyList())] = mutableSetOf("11", "12", "13") Assert.assertEquals("query[name=sigma-123] query[name=sigma-456] ! && ", equation.toString()) Assert.assertEquals(mutableSetOf("1", "2", "11"), equation.evaluate(queryToDocIds)) } @@ -98,8 +98,8 @@ class TriggerExpressionResolverTests : OpenSearchTestCase() { val equation = TriggerExpressionParser(eqString).parse() val queryToDocIds = mutableMapOf>() queryToDocIds[DocLevelQuery("", "sigma-123", listOf(), "", emptyList())] = mutableSetOf("6", "3", "7") - queryToDocIds[DocLevelQuery("id1456", "", listOf(), "", emptyList())] = mutableSetOf("11", "12", "15") - queryToDocIds[DocLevelQuery("id_new", "", listOf(), "", emptyList())] = mutableSetOf("11", "12", "13") + queryToDocIds[DocLevelQuery("id1456", "sigma-456", listOf(), "", emptyList())] = mutableSetOf("11", "12", "15") + queryToDocIds[DocLevelQuery("id_new", "sigma-789", listOf(), "", emptyList())] = mutableSetOf("11", "12", "13") Assert.assertEquals("query[name=sigma-123] query[id=id1456] ! || ", equation.toString()) Assert.assertEquals(mutableSetOf("6", "3", "7", "13"), equation.evaluate(queryToDocIds)) } @@ -110,9 +110,9 @@ class TriggerExpressionResolverTests : OpenSearchTestCase() { val queryToDocIds = mutableMapOf>() queryToDocIds[DocLevelQuery("", "sigma-123", listOf(), "", emptyList())] = mutableSetOf("1", "2", "3") - queryToDocIds[DocLevelQuery("id_random1", "", listOf(), "", mutableListOf("sev1"))] = mutableSetOf("2", "3", "4") + queryToDocIds[DocLevelQuery("id_random1", "sigma-456", listOf(), "", mutableListOf("sev1"))] = mutableSetOf("2", "3", "4") queryToDocIds[DocLevelQuery("", "sigma-789", listOf(), "", emptyList())] = mutableSetOf("11", "12", "13") - queryToDocIds[DocLevelQuery("id-2aw34", "", listOf(), "", emptyList())] = mutableSetOf("13", "14", "15") + queryToDocIds[DocLevelQuery("id-2aw34", "sigma-101112", listOf(), "", emptyList())] = mutableSetOf("13", "14", "15") Assert.assertEquals( "query[name=sigma-123] query[tag=sev1] && query[name=sigma-789] ! query[id=id-2aw34] || ! || ", diff --git a/alerting/src/test/resources/sample.pem b/alerting/src/test/resources/sample.pem index a1fc20a77..b690a603d 100644 --- a/alerting/src/test/resources/sample.pem +++ b/alerting/src/test/resources/sample.pem @@ -1,9 +1,9 @@ -----BEGIN CERTIFICATE----- -MIIEPDCCAySgAwIBAgIUZjrlDPP8azRDPZchA/XEsx0X2iIwDQYJKoZIhvcNAQEL +MIIEPDCCAySgAwIBAgIUaYSlET3nzsotWTrWueVPPh10yLYwDQYJKoZIhvcNAQEL BQAwgY8xEzARBgoJkiaJk/IsZAEZFgNjb20xFzAVBgoJkiaJk/IsZAEZFgdleGFt cGxlMRkwFwYDVQQKDBBFeGFtcGxlIENvbSBJbmMuMSEwHwYDVQQLDBhFeGFtcGxl IENvbSBJbmMuIFJvb3QgQ0ExITAfBgNVBAMMGEV4YW1wbGUgQ29tIEluYy4gUm9v -dCBDQTAeFw0yMzA4MjkwNDIzMTJaFw0zMzA4MjYwNDIzMTJaMFcxCzAJBgNVBAYT +dCBDQTAeFw0yNDAyMjAxNzAzMjVaFw0zNDAyMTcxNzAzMjVaMFcxCzAJBgNVBAYT AmRlMQ0wCwYDVQQHDAR0ZXN0MQ0wCwYDVQQKDARub2RlMQ0wCwYDVQQLDARub2Rl MRswGQYDVQQDDBJub2RlLTAuZXhhbXBsZS5jb20wggEiMA0GCSqGSIb3DQEBAQUA A4IBDwAwggEKAoIBAQCm93kXteDQHMAvbUPNPW5pyRHKDD42XGWSgq0k1D29C/Ud @@ -16,10 +16,10 @@ BEAwPogFKgMEBQWCEm5vZGUtMC5leGFtcGxlLmNvbYIJbG9jYWxob3N0hxAAAAAA AAAAAAAAAAAAAAABhwR/AAABMAsGA1UdDwQEAwIF4DAdBgNVHSUEFjAUBggrBgEF BQcDAQYIKwYBBQUHAwIwDAYDVR0TAQH/BAIwADAdBgNVHQ4EFgQU0/qDQaY10jIo wCjLUpz/HfQXyt8wHwYDVR0jBBgwFoAUF4ffoFrrZhKn1dD4uhJFPLcrAJwwDQYJ -KoZIhvcNAQELBQADggEBAD2hkndVih6TWxoe/oOW0i2Bq7ScNO/n7/yHWL04HJmR -MaHv/Xjc8zLFLgHuHaRvC02ikWIJyQf5xJt0Oqu2GVbqXH9PBGKuEP2kCsRRyU27 -zTclAzfQhqmKBTYQ/3lJ3GhRQvXIdYTe+t4aq78TCawp1nSN+vdH/1geG6QjMn5N -1FU8tovDd4x8Ib/0dv8RJx+n9gytI8n/giIaDCEbfLLpe4EkV5e5UNpOnRgJjjuy -vtZutc81TQnzBtkS9XuulovDE0qI+jQrKkKu8xgGLhgH0zxnPkKtUg2I3Aq6zl1L -zYkEOUF8Y25J6WeY88Yfnc0iigI+Pnz5NK8R9GL7TYo= +KoZIhvcNAQELBQADggEBAGbij5WyF0dKhQodQfTiFDb73ygU6IyeJkFSnxF67gDz +pQJZKFvXuVBa3cGP5e7Qp3TK50N+blXGH0xXeIV9lXeYUk4hVfBlp9LclZGX8tGi +7Xa2enMvIt5q/Yg3Hh755ZxnDYxCoGkNOXUmnMusKstE0YzvZ5Gv6fcRKFBUgZLh +hUBqIEAYly1EqH/y45APiRt3Nor1yF6zEI4TnL0yNrHw6LyQkUNCHIGMJLfnJQ9L +camMGIXOx60kXNMTigF9oXXwixWAnDM9y3QT8QXA7hej/4zkbO+vIeV/7lGUdkyg +PAi92EvyxmsliEMyMR0VINl8emyobvfwa7oMeWMR+hg= -----END CERTIFICATE----- diff --git a/alerting/src/test/resources/test-kirk.jks b/alerting/src/test/resources/test-kirk.jks index 6dbc51e714784fa58a4209c75deab8b9ed1698ff..6c8c5ef77e20980f8c78295b159256b805da6a28 100644 GIT binary patch literal 3766 zcmd^=c{r47AIImJ%`(PV###wuU&o%k$xbMgr4m`Pk2Tv-j4?=zEwY?!X|aVw)I`=A zPAY52Rt6yODkPjhAQ%WsfbL*f;mp!-018Nf*#Q6sf)b!}Nv;s_8gzOC@mTmi+D9F}jyYkhL=#Xk3eYM2csmxKA&W!xAdE{tZ2mEGS z;L%QU`DHcrbdbw$3GsKUvmfQu0Z^?sH7B)!W)eLbG*fXB^G$&6CbCnj4~ z*J>Rkut6vL1EvT!JqAq#X=O~#!JHQ#QVSPuOGlnLrXXB~{{FsGRq?o?I;>^GFEhMB zw;z!v1sXap8nq3zz&+prKs-DRPm*XsS4BaP6Z{8tM~n@m|rxMA=p6*i(w=7 z*2&*Yg-uWU$5|W>>g5h)Fn{3B={`skAJ5_wXB5pDwyj{vG1_{{Y-`wB_i^B!5PA|= zrx=_>rprb&75BQ=J)SKPAJI;?(D#46)o+a?SsR^-&qJjXY2ER8S*1ZvU`t7~M6?NKULuzlAZ8C#X9>8j2;WDY z(TY-^!`&0%67`u|U_-Y(knWVcSlh-kwZQ6KG@S?L`W!iVl>Gyd(LnpMc@C!QeY{(E z)uAwF_CcqH#00}jer2dQk3}R|p^87XCxR8`n4c@g9rASTt9$8}SuGW!!+QQ&w&G!P zvv5Mft<&pzv^&XuuQAj&ieoa*3nI-hx}0`4kym=(cd>?v6yM3v43y@5@;yPeJ_N{@ z622W$@5Z4VqliMF3GAf_RcB;$HX^%cwTCgxg^4)5I0?*&oW|giBB@nUNBO+IX=iON zo~;L}HOwhyeqH4GHvAQ5i=|0c+_5*661aDyT_tr=I#+Zog%!9nRiuBb8m&SS4qp2fv7HJMG zwJFuqV*Hoq3`|Mayml;So|9W4Um6Lu8(k+(Hc2}p@&>?!7!7H~9*O%@BrKNAOa-~e z$e6#G)fJ+wNz5x9zU;#>&V}d z?!F1W_eNN;&LI9$!kWa0Zqa)0CVM4D=x(r>aXgW=XQ)PTRsJJ&MC?WjjoMwLRh`-I z8yD|^&(r#NU|pRpRF%wn&t%X`)8HQe%uxEKnXxIu9yui1s$eH0*YZ^Wvt25yOg6{5 zPefKstjqam-PRDz=&-BVb^xZe>{C{$cza!_sV&3M*l0ocMJVr!l~TlJi4JChDn9Nn zc&la1caY}0P&Ho=r;)l;mKBf$V<6A*R6XC}s98g%I7ZIAFI=e6SqQ4;oevw)nw0%^ zKq9#$;{3R0zJv}#mr7@}e+5-(`{C?^vEE#xb7uBY=X#_1v+@~@l?W@Zaq+Yo9bpu& zR<0us_T`(Q6qp1xYb)Rq;tJ|aTZ&y5xqx<_j-|>1$SEi@3!A|| z9YH<3ub_#ai=2WG_V9iQ!NU8mB|$4ZK3Gr>_s15;6W-XV-*##3TjwoMP&yb zq!L{!sQoUn<_ZWb)BbzloM2Zs1tb=+FBn*$!EQmp3Ml#oe;g0);^XP&_osni`NR1A z0SL>FG{F)8;h%d#4-g0eK+%&0UD-=ghUr~yDQ?!lNE5tKiJ_rjY{@`Q1vjbVAFU;|?Qs;w|1hFx_ z`*jR7rVAU>9*yRSpD1)#aOb!)@ak(5hk;guG$_9)=K8Ie^uOP<63|FjrX2UEcJw07 zD5c?bxHD${?)1+CMgPg@0|kH>4NzJZO*;#rl-xA_8*SHCS}ygKZP7*uHbRtmaTE%n zp7Vt7QIt|IIN?)fyS#8IxKHO$?TeY{DpQl5^kyAd$HH^Aa)SJC+I0!ULR znF7*z6R6~{CCW6M^qKuU!N`I`>YB3i6toA7f7#3%T&$5&wm0nY{&d9(g)LB$%g9dX zf>HfjVn9;)rG-^=)tiGDd<5M4wDHPl@yEGU_whSh78l$%S*WCqjvj^Xt?_VKp0T{pQGU!F;?_^4EMT$__$E zH0hMGQlo@W2p^_tPZsnirl@pGb<#0a^*g5ihYtSzKKx%Wg;i4h8B_c6Z+PPWM!I%g zOr-dLp|0@RV@@&InVrwRJfPT~ZY840gT$Jl4)HP^qcTUWE~1&}C2wS3Sv9pJWiRva zyK}a9ilnrYe7SB$bu~GF&GM`D1h@ukNsJY|Yt>|?q(4gzgSUuGwSIfsmlD)%J2V0@ zTU&-58&x%P)-#Oev2~&}bv^wwRbD$?Enu(jJiuwM3shGOZ{$juY+RGk#m^`!p7+vO zAjWFn1{dq`T?N^TggHmN3~VGf^5?a_)R-cj5yfk-?V<|S)%uKn{YGL)7(~eAhWA56 zj7ZS7amp#qQM;t>%6F)v{1S-Gq>88IPiL?2X9=q_r$vhc4{Pd3$WssBMbZaV2W zu&8||{U99-3!x+JudoA1KSAx^0qg$*YLr)FKtJ($lC@k)W?khPY!~B&3F~Xnxs_WH)b*(MC{~@>r={U4@A6+2p8il>0lojdT`r8~C>rA6;jw^lZK9gk<_y!v za(Rbclc{1;TFBtT`lr|YO0}|UXzh>FLsx6RQUq8=?V4{NR#=oxL2}kHb-ZAfuNRt32Rtcg+B4PQKLo)5nT`xBt(f8 zz4zYx{`1az=l47B(|aH0%$a-V&c}OZ28N+d1QLK?7-~f#Qh{)-@KbUEVuBnDwFn`G zTJSH-2g86X{uc$#Cd7a<{=zALBY_C=KPs|Y1i%~&Sotp~4}12H0!$9GfJy&blEDNC z=>%hA9@l)1y-8vD6#cH^U}=KBI0FdeqXH7J!^nt8{(B;j6byi|5|P@4YY{kr2nhrT zsl1TD93_M516EPM#9d4EG(rsFKtBW4^r*(5KwKbTLB){+^0E(}Q+A7HoW0lrA)@i+ zydGtY^95cAh7C?*2qIcESObb&7%#|($|(-eXIiQ#0>bYpj@=?*4?U=5@-ISTdSa4x zOtEjIWb0hr)D^1HVpX7-CjwnsDG8#WM@AVZvyufeW?}`^GtGW7WcGsVl)G*$?lP3S z^GYelg04B!ZBp4GnwCzq@uOLfB4xY#hE;StB61*Yd8?%(Nl9NW{s3+HODy#ik72s%Hj($a8 zhF0>hs}=106=eHlR<&9zT@LuHAUIZWLFWrKQ#$R3^=pv*&-7e6{O_Ji`|s`^^4v@-Hr>`?(V#!ktZ-$-0?Jt1G-G? zE9HvN@-0iPpKSDRsLacPB>#JY4d$KM!zs7xPBvUu4HQ}!Bz$qc)A`=Ver4EBC?!g7b zuW7GvE*puJA=;!bv2_S?8ZQx_n`M?F&kkb{-h zKwO=OA_@auvAUmAsQW~NjYK|}m{>`{*n^45MJ^ph*%K9}8GnxA%-;D^^-}ih8oWP* zXJ#vzJY3e4?&oSey+_=qv19lq zeLI>%Gjx=y!qVzf%Y&c7dgkjEw?^rl8^KxGs^%{Fd_(b51&l(wYCO&Rc~ZUl5^~y> zc}BJ!4+n2KaS|<{vd#M44my1W|M0Y-gfk9<&l%IBje@31-Sr1Mt!fvT(Pe+Gt$Bz? z_up@HJf$b!)YfI|4{%l^JDxgWvp75|nMzg7E)(qZ%=alvt zXMfZg7Z=_eanGP?tBXFKyvFRu$?uMAzg|k-(32orZccxnHGr$(gM%4Hgc&3blJCi; z6j@^Y3XVg*doBz7pms~Jn7 z9>1&oI7bPBOnn7vyV1x>YahPMDy_bySw!71ij);ebzBEUSZK&o1y43I-AuJKXJ~C3 z{ScF0neCZB8?5r>Px#3V%} zq$OY&i2FZH#6&q5i2Yy421o$-o6P@Z2>vgd4p$sB)+@I7CAQvk>m=OVG#EC`^#8Hx zXo}&oS5+Eg(sw4>QN4_Cy_0U!W9o!pxS@}|4s+L{ow)59*P>fYuDV~JqCwTL5s{)3(v zzbM`$E?)E;`zu*Kjpah> zgQl1ucOJOd1|%MDBk_Lsu64*-#r>9orWT19xT!DnCoNv_AnWczl?5a3@Sd4mtPrx@ z;QPqXK#%ve%3=_Sa$)(zJ)mvCYW0$Uim6bQ!S}#H@uPFY+qvmT_x`cr%&q*~6sufG zKKVZ8ebd?WhVYT)or=?jzV*~PLH&t?CH^KO=IX%=oHNr75%vVz=nN9ipHOrX*7{h! zNkaI3@a@JfTINcbD<@;DNwqa&=S5v4pM=tBEMN8HU3}euq?(dEFWfNC>H+2C+1dBA zFs|s&27315cK^vG`LRKX~{Ugw!|2K~TP_VAqXtzNY6)j={rQ zv73v$!psb1ph9o6`kKlGjC8GEdFX9+@{I}q{33}%?v>$a-cw6HGOOLVnv3ITN_D~k zo^QL%)6K#_{j)b&>8Qy@Eweq=Ne8rKsjJTe)mfDw?scqlc&US2dxU0@o5$(Zu(GB4 zujr5^yZdwlP>E{wrkq=NiW~PQZm5`fJz5m&9I}B^zPVNSSa9vWcXu^m%+bU|aOg5q zK%|a72J^vxGy)&3GlNod=Wt|FBG=mgP)o%{(2PCL$9s$dMvIcv^FdM?hbNYQrX%I| z{binoW_?J27M3L2H_Y4n0!3PGL#b*UxRbpd3l$RLC#I})-32((m#4}vP%kHB3Q7PGLpvuro4~7i2u6z$3ar+YSP2?_%+^%f* zR}5Rl@nUnDVdT&uE_ZP%NU-(Zn*^k2*4S;xubW_f3f-cK+=>uy-sK;&F{mRdpgwIgSHfJSw=22paH-mu>R=3Kf9cR*A_Sjg7q#MM< zqobyHu#q_oM3;REOf&nTGa=n6MK4QZ{pey;iGwX&bnAUCVq`=c0{gykLm{VZo%ulF z*n_LEk%}KbmVW1)L+Ab3sSZPR+Fe*5p$^HC|Oyb{_is> zsuD42;l;BT-a#X6fP(~C+`TP&(``5KD7dp9)GD&EVfNN4Bf@5N63j4c_IOZZ`^gF1 zphj9>;b1JVOWrk`HhO{mmk*Lp>wXpL*r|VQth!^2ajO2-Q$=;E0ZcMzj9V;D}3k7ej?g$MEOSvfr*p<&b z6B?7p3F^a78y9pEd$#q2Pm1b zU#?c^Op~TXSZ`3z2a{A=UzcS`zB%Z|XG2xth@1`h=wY$wyp|u2)s&QN#af+k>`vF! z&{oB;K{Wblwtcc`JH%E!TwV2q%vd}p>iZ9d@C(kwR>Dm)p? zV-i0tv8PP66)jD1#I*Qm*`@U`^o)}|58+bGD1y(EEM_dJh-O9xP^xdF-_Z#qZ&m{c zbC6W;iNU!24Cvnj14>>_V8a{IB$GXu&z39rEKNX_07*3xp*W3rJo!}pp2M0Hwe$#* zi#HgV_>>SSD;YT=uK8*Lu|$a+IIXPF$${!eaPU%X#jh@y96VcWEFGqB#<_hE8QPmQ zO_C$p_nXzGgQtqVrC1t-5`*juoj0Q%VLnw`@Yt&eCg!x)84Pq&N%`@t**O@LYz3OR(@+})Hu&$>gJ;6oxdO{ z&KR3!hDx52>YBb*JE@4B`8}j*yOg=37>&zbSN}#T@GA6n9+dFcA*9q_l2eI%Xh*7~ ziU87?k{%5!@e5oasj8xTY|ysPyOMR3W;w?vvG}prD%~$8wf$j!6&K4LI%aD1$6B&8 zG|Bq_{em<75I~pVeMNJ6Dv9e{<=x@Es?2r|L;d(lJhNv+5~$`ps7`1lAq>B{Ot5Ga z6qD6CeNHKADuYBeC(!$C>E5yJ7O5IFfdN*2lPV*LTj(fX$`T*h6!l7_BFQ%HhbJFp zKUVk@Dl`5ZH)LoQ^{7N6?HyY_;Jo?*Uu#dn_XW`49o!xdK!+JJN_3KD7k@2J((0h0 z?0!++a*3VkR_Y8-s+o<1M(>PCz=|sJMqa z0+r0sNH_$gvD_@AC}TCb8}m~2v}_leWOtWdheZwxJl0i{OGIRcO0iVJ-B>5CgP^O-M7OYVJ*8(0|euX~UGp`sq@@gaEw*bHD4*Dj8_ zPO4*=dce-k-f;9Xl`P>A2U6SzIPhFWQT>2(PjqTMlBf}zL3<&dS*!E0mM}&jbXhc- zAb9}5!V(`=H1zl4fM|8TdAE{XwAuTJ>dTw3o}wzSb&xhxCijhe4Q#{|l(FXGy+A)j zH>IZrWy4|#?wJ-1?zBm;cKLHK*H5ngXeiJE?k?6Lz1i+02rcMG7kNDQlDJ_??0D#; z(Bju>vbV@>IGl97vC?TD(|fa!E?NjDA;*m&#_ZiX>Vgi+wr`atYOngkRp_w%?M~sv zUVImV4>dX4Ih+MO4LU`Ui=K%20a~JOwq1$6)KUw@81y#uUGKMV4>O0ioDGDvtZ{Jl zmay)x!zLD>Hl1jqnzX9b_da}w9xr9S`kQwUZPAei4I5Ao#$N}f9I10=!}MXIF!F!C z6+i+ofRKI2Rvlk8erCmgYu2%A6S_nSX7!cGJQ6pQ{xw*Iw(KXQGft90Ft(YQ<7nw! ROz*Khv5A{`^It3We*oUlR=)rM