From a1f8128516390ddc933dc7c53dbb65aaeedbfe1f Mon Sep 17 00:00:00 2001 From: Davis McPhee Date: Wed, 20 Nov 2024 14:52:47 -0400 Subject: [PATCH] [Data Views] Mitigate issue where `has_es_data` check can cause Kibana to hang (#200476) ## Summary This PR mitigates an issue where the `has_es_data` check can hang when some remote clusters are unresponsive, leaving users stuck in a loading state in some apps (e.g. Discover and Dashboard) until the request times out. There are two main changes that help mitigate this issue: - The `resolve/cluster` request in the `has_es_data` endpoint has been split into two requests -- one for local data first, then another for remote data second. In cases where remote clusters are unresponsive but there is data available in the local cluster, the remote check is never performed and the check completes quickly. This likely resolves the majority of cases and is also likely faster in general than checking both local and remote clusters in a single request. - In cases where there is no local data and the remote `resolve/cluster` request hangs, a new `data_views.hasEsDataTimeout` config has been added to `kibana.yml` (defaults to 5 seconds) to abort the request after a short delay. This scenario is handled in the front end by displaying an error toast to the user informing them of the issue, and assuming there is data available to avoid blocking them. When this occurs, a warning is also logged to the Kibana server logs. ![CleanShot 2024-11-18 at 23 47 34@2x](https://github.com/user-attachments/assets/6ea14869-b6b6-4d89-a90c-8150d6e6b043) Fixes #200280. ### Notes - Modifying the existing version of the `has_es_data` endpoint in this way should be backward compatible since the behaviour should remain unchanged from before when the client and server versions don't match (please validate if this seems accurate during review). - For a long term fix, the ES team is investigating the issue with `resolve/cluster` and will aim to have it behave like `resolve/index`, which fails quickly when remote clusters are unresponsive. They may also implement other mitigations like a configurable timeout in ES: https://github.com/elastic/elasticsearch/issues/114020. The purpose of this PR is to provide an immediate solution in Kibana that mitigates the issue as much as possible. - If ES ends up providing another performant method for checking if indices exist instead of `resolve/cluster`, Kibana should migrate to that. More details in https://github.com/elastic/elasticsearch/issues/112307. ### Testing notes To reproduce the issue locally, follow these steps: - Follow [these instructions](https://gist.github.com/lukasolson/d0861aa3e6ee476ac8dd7189ed476756) to set up a local CCS environment. - Stop the remote cluster process. - Use Netcat on the remote cluster port to listen to requests but not respond (e.g. on macOS: `nc -l 9600`), simulating an unresponsive cluster. See https://github.com/elastic/elasticsearch/issues/32678 for more context. - Navigate to Discover and observe that the `has_es_data` request hangs. When testing in this PR branch, the request will only wait for 5 seconds before assuming data exists and displaying a toast. ### Checklist - [x] Any text added follows [EUI's writing guidelines](https://elastic.github.io/eui/#/guidelines/writing), uses sentence case text and includes [i18n support](https://github.com/elastic/kibana/blob/main/packages/kbn-i18n/README.md) - [ ] [Documentation](https://www.elastic.co/guide/en/kibana/master/development-documentation.html) was added for features that require explanation or tutorials - [x] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios - [ ] If a plugin configuration key changed, check if it needs to be allowlisted in the cloud and added to the [docker list](https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/resources/base/bin/kibana-docker) - [x] This was checked for breaking HTTP API changes, and any breaking changes have been approved by the breaking-change committee. The `release_note:breaking` label should be applied in these situations. - [ ] [Flaky Test Runner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was used on any tests changed - [x] The PR description includes the appropriate Release Notes section, and the correct `release_node:*` label is applied per the [guidelines](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process) --------- Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com> (cherry picked from commit 96fd4b682b77f6c1d6d1c6ab0742462d9e9d2589) --- src/plugins/data_views/common/constants.ts | 9 + src/plugins/data_views/common/index.ts | 1 + src/plugins/data_views/common/types.ts | 1 + .../public/services/has_data.test.ts | 73 ++++ .../data_views/public/services/has_data.ts | 56 ++- src/plugins/data_views/server/index.ts | 2 +- src/plugins/data_views/server/plugin.ts | 2 + .../internal/has_es_data.test.ts | 319 ++++++++++++++++++ .../rest_api_routes/internal/has_es_data.ts | 133 +++++++- src/plugins/data_views/server/routes.ts | 12 +- src/plugins/data_views/tsconfig.json | 3 + 11 files changed, 582 insertions(+), 29 deletions(-) create mode 100644 src/plugins/data_views/server/rest_api_routes/internal/has_es_data.test.ts diff --git a/src/plugins/data_views/common/constants.ts b/src/plugins/data_views/common/constants.ts index b1e68fd44745c..4b1cf465efcc9 100644 --- a/src/plugins/data_views/common/constants.ts +++ b/src/plugins/data_views/common/constants.ts @@ -79,3 +79,12 @@ export const EXISTING_INDICES_PATH = '/internal/data_views/_existing_indices'; export const DATA_VIEWS_FIELDS_EXCLUDED_TIERS = 'data_views:fields_excluded_data_tiers'; export const DEFAULT_DATA_VIEW_ID = 'defaultIndex'; + +/** + * Valid `failureReason` attribute values for `has_es_data` API error responses + */ +export enum HasEsDataFailureReason { + localDataTimeout = 'local_data_timeout', + remoteDataTimeout = 'remote_data_timeout', + unknown = 'unknown', +} diff --git a/src/plugins/data_views/common/index.ts b/src/plugins/data_views/common/index.ts index 2b5ca664d56fc..d359489681a2e 100644 --- a/src/plugins/data_views/common/index.ts +++ b/src/plugins/data_views/common/index.ts @@ -13,6 +13,7 @@ export { META_FIELDS, DATA_VIEW_SAVED_OBJECT_TYPE, MAX_DATA_VIEW_FIELD_DESCRIPTION_LENGTH, + HasEsDataFailureReason, } from './constants'; export { LATEST_VERSION } from './content_management/v1/constants'; diff --git a/src/plugins/data_views/common/types.ts b/src/plugins/data_views/common/types.ts index 45f747691fc4b..8c229a01d0477 100644 --- a/src/plugins/data_views/common/types.ts +++ b/src/plugins/data_views/common/types.ts @@ -571,4 +571,5 @@ export interface ClientConfigType { scriptedFieldsEnabled?: boolean; dataTiersExcludedForFields?: string; fieldListCachingEnabled?: boolean; + hasEsDataTimeout: number; } diff --git a/src/plugins/data_views/public/services/has_data.test.ts b/src/plugins/data_views/public/services/has_data.test.ts index fc032ee44bc41..1171cd677b64f 100644 --- a/src/plugins/data_views/public/services/has_data.test.ts +++ b/src/plugins/data_views/public/services/has_data.test.ts @@ -10,6 +10,7 @@ import { coreMock } from '@kbn/core/public/mocks'; import { HasData } from './has_data'; +import { HttpFetchError } from '@kbn/core-http-browser-internal/src/http_fetch_error'; describe('when calling hasData service', () => { describe('hasDataView', () => { @@ -170,6 +171,78 @@ describe('when calling hasData service', () => { expect(await response).toBe(false); }); + + it('should return true and show an error toast when checking for remote cluster data times out', async () => { + const coreStart = coreMock.createStart(); + const http = coreStart.http; + + // Mock getIndices + const spy = jest.spyOn(http, 'get').mockImplementation(() => + Promise.reject( + new HttpFetchError( + 'Timeout while checking for Elasticsearch data', + 'TimeoutError', + new Request(''), + undefined, + { + statusCode: 504, + message: 'Timeout while checking for Elasticsearch data', + attributes: { + failureReason: 'remote_data_timeout', + }, + } + ) + ) + ); + const hasData = new HasData(); + const hasDataService = hasData.start(coreStart, true); + const response = hasDataService.hasESData(); + + expect(spy).toHaveBeenCalledTimes(1); + expect(await response).toBe(true); + expect(coreStart.notifications.toasts.addDanger).toHaveBeenCalledTimes(1); + expect(coreStart.notifications.toasts.addDanger).toHaveBeenCalledWith({ + title: 'Remote cluster timeout', + text: 'Checking for data on remote clusters timed out. One or more remote clusters may be unavailable.', + }); + }); + + it('should return true and not show an error toast when checking for remote cluster data times out, but onRemoteDataTimeout is overridden', async () => { + const coreStart = coreMock.createStart(); + const http = coreStart.http; + + // Mock getIndices + const responseBody = { + statusCode: 504, + message: 'Timeout while checking for Elasticsearch data', + attributes: { + failureReason: 'remote_data_timeout', + }, + }; + const spy = jest + .spyOn(http, 'get') + .mockImplementation(() => + Promise.reject( + new HttpFetchError( + 'Timeout while checking for Elasticsearch data', + 'TimeoutError', + new Request(''), + undefined, + responseBody + ) + ) + ); + const hasData = new HasData(); + const hasDataService = hasData.start(coreStart, true); + const onRemoteDataTimeout = jest.fn(); + const response = hasDataService.hasESData({ onRemoteDataTimeout }); + + expect(spy).toHaveBeenCalledTimes(1); + expect(await response).toBe(true); + expect(coreStart.notifications.toasts.addDanger).not.toHaveBeenCalled(); + expect(onRemoteDataTimeout).toHaveBeenCalledTimes(1); + expect(onRemoteDataTimeout).toHaveBeenCalledWith(responseBody); + }); }); describe('resolve/cluster not available', () => { diff --git a/src/plugins/data_views/public/services/has_data.ts b/src/plugins/data_views/public/services/has_data.ts index aad546c446cf3..bcf80ca337460 100644 --- a/src/plugins/data_views/public/services/has_data.ts +++ b/src/plugins/data_views/public/services/has_data.ts @@ -8,10 +8,22 @@ */ import { CoreStart, HttpStart } from '@kbn/core/public'; -import { DEFAULT_ASSETS_TO_IGNORE } from '../../common'; +import { IHttpFetchError, ResponseErrorBody, isHttpFetchError } from '@kbn/core-http-browser'; +import { isObject } from 'lodash'; +import { i18n } from '@kbn/i18n'; +import { DEFAULT_ASSETS_TO_IGNORE, HasEsDataFailureReason } from '../../common'; import { HasDataViewsResponse, IndicesViaSearchResponse } from '..'; import { IndicesResponse, IndicesResponseModified } from '../types'; +export interface HasEsDataParams { + /** + * Callback to handle the case where checking for remote data times out. + * If not provided, the default behavior is to show a toast notification. + * @param body The error response body + */ + onRemoteDataTimeout?: (body: ResponseErrorBody) => void; +} + export class HasData { private removeAliases = (source: IndicesResponseModified): boolean => !source.item.indices; @@ -38,28 +50,55 @@ export class HasData { return hasLocalESData; }; - const hasESDataViaResolveCluster = async () => { + const hasESDataViaResolveCluster = async ( + onRemoteDataTimeout: (body: ResponseErrorBody) => void + ) => { try { const { hasEsData } = await http.get<{ hasEsData: boolean }>( '/internal/data_views/has_es_data', - { - version: '1', - } + { version: '1' } ); + return hasEsData; } catch (e) { + if ( + this.isResponseError(e) && + e.body?.statusCode === 504 && + e.body?.attributes?.failureReason === HasEsDataFailureReason.remoteDataTimeout + ) { + onRemoteDataTimeout(e.body); + + // In the case of a remote cluster timeout, + // we can't be sure if there is data or not, + // so just assume there is + return true; + } + // fallback to previous implementation return hasESDataViaResolveIndex(); } }; + const showRemoteDataTimeoutToast = () => + core.notifications.toasts.addDanger({ + title: i18n.translate('dataViews.hasData.remoteDataTimeoutTitle', { + defaultMessage: 'Remote cluster timeout', + }), + text: i18n.translate('dataViews.hasData.remoteDataTimeoutText', { + defaultMessage: + 'Checking for data on remote clusters timed out. One or more remote clusters may be unavailable.', + }), + }); + return { /** * Check to see if ES data exists */ - hasESData: async (): Promise => { + hasESData: async ({ + onRemoteDataTimeout = showRemoteDataTimeoutToast, + }: HasEsDataParams = {}): Promise => { if (callResolveCluster) { - return hasESDataViaResolveCluster(); + return hasESDataViaResolveCluster(onRemoteDataTimeout); } return hasESDataViaResolveIndex(); }, @@ -82,6 +121,9 @@ export class HasData { // ES Data + private isResponseError = (e: Error): e is IHttpFetchError => + isHttpFetchError(e) && isObject(e.body) && 'message' in e.body && 'statusCode' in e.body; + private responseToItemArray = (response: IndicesResponse): IndicesResponseModified[] => { const { indices = [], aliases = [] } = response; const source: IndicesResponseModified[] = []; diff --git a/src/plugins/data_views/server/index.ts b/src/plugins/data_views/server/index.ts index d72a50d20e31c..143bea2ba5d51 100644 --- a/src/plugins/data_views/server/index.ts +++ b/src/plugins/data_views/server/index.ts @@ -47,7 +47,6 @@ const configSchema = schema.object({ schema.boolean({ defaultValue: false }), schema.never() ), - dataTiersExcludedForFields: schema.conditional( schema.contextRef('serverless'), true, @@ -60,6 +59,7 @@ const configSchema = schema.object({ schema.boolean({ defaultValue: false }), schema.boolean({ defaultValue: true }) ), + hasEsDataTimeout: schema.number({ defaultValue: 5000 }), }); type ConfigType = TypeOf; diff --git a/src/plugins/data_views/server/plugin.ts b/src/plugins/data_views/server/plugin.ts index 8decac6c36b1f..9e79da893949a 100644 --- a/src/plugins/data_views/server/plugin.ts +++ b/src/plugins/data_views/server/plugin.ts @@ -63,9 +63,11 @@ export class DataViewsServerPlugin registerRoutes({ http: core.http, + logger: this.logger, getStartServices: core.getStartServices, isRollupsEnabled: () => this.rollupsEnabled, dataViewRestCounter, + hasEsDataTimeout: config.hasEsDataTimeout, }); expressions.registerFunction(getIndexPatternLoad({ getStartServices: core.getStartServices })); diff --git a/src/plugins/data_views/server/rest_api_routes/internal/has_es_data.test.ts b/src/plugins/data_views/server/rest_api_routes/internal/has_es_data.test.ts new file mode 100644 index 0000000000000..7ca07d25bf773 --- /dev/null +++ b/src/plugins/data_views/server/rest_api_routes/internal/has_es_data.test.ts @@ -0,0 +1,319 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +import type { MockedKeys } from '@kbn/utility-types-jest'; +import { IKibanaResponse, Logger, RequestHandlerContext } from '@kbn/core/server'; +import { httpServerMock } from '@kbn/core/server/mocks'; +import { createHandler, crossClusterPatterns, patterns } from './has_es_data'; +import { loggerMock } from '@kbn/logging-mocks'; + +const mockEsDataTimeout = 5000; + +describe('has_es_data route', () => { + let mockLogger: MockedKeys; + + beforeEach(() => { + mockLogger = loggerMock.create(); + }); + + it('should return hasEsData: true if there are matching local indices', async () => { + const mockESClient = { + indices: { + resolveCluster: jest.fn().mockResolvedValue({ + local: { matching_indices: true }, + }), + }, + }; + const mockContext = { + core: { + elasticsearch: { client: { asCurrentUser: mockESClient } }, + }, + } as unknown as RequestHandlerContext; + const mockRequest = httpServerMock.createKibanaRequest(); + const mockResponse = httpServerMock.createResponseFactory(); + jest + .spyOn(mockResponse, 'ok') + .mockImplementation((params) => params as unknown as IKibanaResponse); + const handler = createHandler(mockLogger, mockEsDataTimeout); + const response = await handler(mockContext, mockRequest, mockResponse); + expect(mockESClient.indices.resolveCluster).toBeCalledTimes(1); + expect(mockESClient.indices.resolveCluster).toBeCalledWith( + { + name: patterns, + allow_no_indices: true, + ignore_unavailable: true, + }, + { requestTimeout: mockEsDataTimeout } + ); + expect(mockResponse.ok).toBeCalledTimes(1); + expect(mockResponse.ok).toBeCalledWith({ body: { hasEsData: true } }); + expect(response).toEqual({ body: { hasEsData: true } }); + }); + + it('should return hasEsData: true if there are no matching local indices but matching remote indices', async () => { + const mockESClient = { + indices: { + resolveCluster: jest + .fn() + .mockImplementation(({ name }) => + name === patterns + ? { local: { matching_indices: false } } + : name === crossClusterPatterns + ? { remote: { matching_indices: true } } + : {} + ), + }, + }; + const mockContext = { + core: { + elasticsearch: { client: { asCurrentUser: mockESClient } }, + }, + } as unknown as RequestHandlerContext; + const mockRequest = httpServerMock.createKibanaRequest(); + const mockResponse = httpServerMock.createResponseFactory(); + jest + .spyOn(mockResponse, 'ok') + .mockImplementation((params) => params as unknown as IKibanaResponse); + const handler = createHandler(mockLogger, mockEsDataTimeout); + const response = await handler(mockContext, mockRequest, mockResponse); + expect(mockESClient.indices.resolveCluster).toBeCalledTimes(2); + expect(mockESClient.indices.resolveCluster).toHaveBeenNthCalledWith( + 1, + { + name: patterns, + allow_no_indices: true, + ignore_unavailable: true, + }, + { requestTimeout: mockEsDataTimeout } + ); + expect(mockESClient.indices.resolveCluster).toHaveBeenNthCalledWith( + 2, + { + name: crossClusterPatterns, + allow_no_indices: true, + ignore_unavailable: true, + }, + { requestTimeout: mockEsDataTimeout } + ); + expect(mockResponse.ok).toBeCalledTimes(1); + expect(mockResponse.ok).toBeCalledWith({ body: { hasEsData: true } }); + expect(response).toEqual({ body: { hasEsData: true } }); + }); + + it('should return hasEsData: false if there are no matching local or remote indices', async () => { + const mockESClient = { + indices: { + resolveCluster: jest.fn().mockResolvedValue({ + local: { matching_indices: false }, + remote: { matching_indices: false }, + }), + }, + }; + const mockContext = { + core: { + elasticsearch: { client: { asCurrentUser: mockESClient } }, + }, + } as unknown as RequestHandlerContext; + const mockRequest = httpServerMock.createKibanaRequest(); + const mockResponse = httpServerMock.createResponseFactory(); + jest + .spyOn(mockResponse, 'ok') + .mockImplementation((params) => params as unknown as IKibanaResponse); + const handler = createHandler(mockLogger, mockEsDataTimeout); + const response = await handler(mockContext, mockRequest, mockResponse); + expect(mockESClient.indices.resolveCluster).toBeCalledTimes(2); + expect(mockESClient.indices.resolveCluster).toHaveBeenNthCalledWith( + 1, + { + name: patterns, + allow_no_indices: true, + ignore_unavailable: true, + }, + { requestTimeout: mockEsDataTimeout } + ); + expect(mockESClient.indices.resolveCluster).toHaveBeenNthCalledWith( + 2, + { + name: crossClusterPatterns, + allow_no_indices: true, + ignore_unavailable: true, + }, + { requestTimeout: mockEsDataTimeout } + ); + expect(mockResponse.ok).toBeCalledTimes(1); + expect(mockResponse.ok).toBeCalledWith({ body: { hasEsData: false } }); + expect(response).toEqual({ body: { hasEsData: false } }); + }); + + it('should return a 504 response and log a warning if the local data request times out', async () => { + const mockESClient = { + indices: { + resolveCluster: jest.fn().mockRejectedValue({ name: 'TimeoutError' }), + }, + }; + const mockContext = { + core: { + elasticsearch: { client: { asCurrentUser: mockESClient } }, + }, + } as unknown as RequestHandlerContext; + const mockRequest = httpServerMock.createKibanaRequest(); + const mockResponse = httpServerMock.createResponseFactory(); + jest + .spyOn(mockResponse, 'customError') + .mockImplementation((params) => params as unknown as IKibanaResponse); + const handler = createHandler(mockLogger, mockEsDataTimeout); + const response = await handler(mockContext, mockRequest, mockResponse); + expect(mockESClient.indices.resolveCluster).toBeCalledTimes(1); + expect(mockESClient.indices.resolveCluster).toBeCalledWith( + { + name: patterns, + allow_no_indices: true, + ignore_unavailable: true, + }, + { requestTimeout: mockEsDataTimeout } + ); + expect(mockResponse.customError).toBeCalledTimes(1); + expect(mockResponse.customError).toBeCalledWith({ + statusCode: 504, + body: { + message: 'Timeout while checking for Elasticsearch data', + attributes: { failureReason: 'local_data_timeout' }, + }, + }); + expect(response).toEqual({ + statusCode: 504, + body: { + message: 'Timeout while checking for Elasticsearch data', + attributes: { failureReason: 'local_data_timeout' }, + }, + }); + expect(mockLogger.warn).toBeCalledTimes(1); + expect(mockLogger.warn).toBeCalledWith( + 'Timeout while checking for Elasticsearch data: local_data_timeout. Current timeout value is 5000ms. ' + + 'Use "data_views.hasEsDataTimeout" in kibana.yml to change it, or set to 0 to disable timeouts.' + ); + }); + + it('should return a 504 response and log a warning if the remote data request times out', async () => { + const mockESClient = { + indices: { + resolveCluster: jest.fn().mockImplementation(({ name }) => { + if (name === patterns) { + return { local: { matching_indices: false } }; + } + + if (name === crossClusterPatterns) { + // eslint-disable-next-line no-throw-literal + throw { name: 'TimeoutError' }; + } + + return {}; + }), + }, + }; + const mockContext = { + core: { + elasticsearch: { client: { asCurrentUser: mockESClient } }, + }, + } as unknown as RequestHandlerContext; + const mockRequest = httpServerMock.createKibanaRequest(); + const mockResponse = httpServerMock.createResponseFactory(); + jest + .spyOn(mockResponse, 'customError') + .mockImplementation((params) => params as unknown as IKibanaResponse); + const handler = createHandler(mockLogger, mockEsDataTimeout); + const response = await handler(mockContext, mockRequest, mockResponse); + expect(mockESClient.indices.resolveCluster).toBeCalledTimes(2); + expect(mockESClient.indices.resolveCluster).toHaveBeenNthCalledWith( + 1, + { + name: patterns, + allow_no_indices: true, + ignore_unavailable: true, + }, + { requestTimeout: mockEsDataTimeout } + ); + expect(mockESClient.indices.resolveCluster).toHaveBeenNthCalledWith( + 2, + { + name: crossClusterPatterns, + allow_no_indices: true, + ignore_unavailable: true, + }, + { requestTimeout: mockEsDataTimeout } + ); + expect(mockResponse.customError).toBeCalledTimes(1); + expect(mockResponse.customError).toBeCalledWith({ + statusCode: 504, + body: { + message: 'Timeout while checking for Elasticsearch data', + attributes: { failureReason: 'remote_data_timeout' }, + }, + }); + expect(response).toEqual({ + statusCode: 504, + body: { + message: 'Timeout while checking for Elasticsearch data', + attributes: { failureReason: 'remote_data_timeout' }, + }, + }); + expect(mockLogger.warn).toBeCalledTimes(1); + expect(mockLogger.warn).toBeCalledWith( + 'Timeout while checking for Elasticsearch data: remote_data_timeout. Current timeout value is 5000ms. ' + + 'Use "data_views.hasEsDataTimeout" in kibana.yml to change it, or set to 0 to disable timeouts.' + ); + }); + + it('should return a 500 response and log an error if the request fails for an unknown reason', async () => { + const someError = new Error('Some error'); + const mockESClient = { + indices: { + resolveCluster: jest.fn().mockRejectedValue(someError), + }, + }; + const mockContext = { + core: { + elasticsearch: { client: { asCurrentUser: mockESClient } }, + }, + } as unknown as RequestHandlerContext; + const mockRequest = httpServerMock.createKibanaRequest(); + const mockResponse = httpServerMock.createResponseFactory(); + jest + .spyOn(mockResponse, 'customError') + .mockImplementation((params) => params as unknown as IKibanaResponse); + const handler = createHandler(mockLogger, mockEsDataTimeout); + const response = await handler(mockContext, mockRequest, mockResponse); + expect(mockESClient.indices.resolveCluster).toBeCalledTimes(1); + expect(mockESClient.indices.resolveCluster).toBeCalledWith( + { + name: patterns, + allow_no_indices: true, + ignore_unavailable: true, + }, + { requestTimeout: mockEsDataTimeout } + ); + expect(mockResponse.customError).toBeCalledTimes(1); + expect(mockResponse.customError).toBeCalledWith({ + statusCode: 500, + body: { + message: 'Error while checking for Elasticsearch data', + attributes: { failureReason: 'unknown' }, + }, + }); + expect(response).toEqual({ + statusCode: 500, + body: { + message: 'Error while checking for Elasticsearch data', + attributes: { failureReason: 'unknown' }, + }, + }); + expect(mockLogger.error).toBeCalledTimes(1); + expect(mockLogger.error).toBeCalledWith(someError); + }); +}); diff --git a/src/plugins/data_views/server/rest_api_routes/internal/has_es_data.ts b/src/plugins/data_views/server/rest_api_routes/internal/has_es_data.ts index 4f3fb9f19a6ff..72b2e508ba529 100644 --- a/src/plugins/data_views/server/rest_api_routes/internal/has_es_data.ts +++ b/src/plugins/data_views/server/rest_api_routes/internal/has_es_data.ts @@ -7,34 +7,124 @@ * License v3.0 only", or the "Server Side Public License, v 1". */ -import { IRouter, RequestHandlerContext } from '@kbn/core/server'; -import type { VersionedRoute } from '@kbn/core-http-server'; +import type { ElasticsearchClient, IRouter, Logger, RequestHandlerContext } from '@kbn/core/server'; +import type { KibanaResponseFactory, VersionedRoute } from '@kbn/core-http-server'; import { schema } from '@kbn/config-schema'; -import { DEFAULT_ASSETS_TO_IGNORE } from '../../../common'; +import { DEFAULT_ASSETS_TO_IGNORE, HasEsDataFailureReason } from '../../../common'; type Handler = Parameters['addVersion']>[1]; -const patterns = ['*', '-.*'].concat( +export const patterns = ['*', '-.*'].concat( DEFAULT_ASSETS_TO_IGNORE.DATA_STREAMS_TO_IGNORE.map((ds) => `-${ds}`) ); -const crossClusterPatterns = patterns.map((ds) => `*:${ds}`); +export const crossClusterPatterns = patterns.map((ds) => `*:${ds}`); -export const handler: Handler = async (ctx: RequestHandlerContext, req, res) => { - const core = await ctx.core; - const elasticsearchClient = core.elasticsearch.client.asCurrentUser; - const response = await elasticsearchClient.indices.resolveCluster({ - name: patterns.concat(crossClusterPatterns), - allow_no_indices: true, - ignore_unavailable: true, - }); +export const createHandler = + (parentLogger: Logger, hasEsDataTimeout: number): Handler => + async (ctx, _, res) => { + const logger = parentLogger.get('hasEsData'); + const core = await ctx.core; + const elasticsearchClient = core.elasticsearch.client.asCurrentUser; + const commonParams: Omit = { + elasticsearchClient, + logger, + res, + hasEsDataTimeout, + }; - const hasEsData = !!Object.values(response).find((cluster) => cluster.matching_indices); + const localDataResponse = await hasEsData({ + ...commonParams, + matchPatterns: patterns, + timeoutReason: HasEsDataFailureReason.localDataTimeout, + }); - return res.ok({ body: { hasEsData } }); + if (localDataResponse) { + return localDataResponse; + } + + const remoteDataResponse = await hasEsData({ + ...commonParams, + matchPatterns: crossClusterPatterns, + timeoutReason: HasEsDataFailureReason.remoteDataTimeout, + }); + + if (remoteDataResponse) { + return remoteDataResponse; + } + + return res.ok({ body: { hasEsData: false } }); + }; + +interface HasEsDataParams { + elasticsearchClient: ElasticsearchClient; + logger: Logger; + res: KibanaResponseFactory; + matchPatterns: string[]; + hasEsDataTimeout: number; + timeoutReason: HasEsDataFailureReason; +} + +const timeoutMessage = 'Timeout while checking for Elasticsearch data'; +const errorMessage = 'Error while checking for Elasticsearch data'; + +const hasEsData = async ({ + elasticsearchClient, + logger, + res, + matchPatterns, + hasEsDataTimeout, + timeoutReason, +}: HasEsDataParams) => { + try { + const response = await elasticsearchClient.indices.resolveCluster( + { + name: matchPatterns, + allow_no_indices: true, + ignore_unavailable: true, + }, + { requestTimeout: hasEsDataTimeout === 0 ? undefined : hasEsDataTimeout } + ); + + const hasData = Object.values(response).some((cluster) => cluster.matching_indices); + + if (hasData) { + return res.ok({ body: { hasEsData: true } }); + } + } catch (e) { + if (e.name === 'TimeoutError') { + const warningMessage = + `${timeoutMessage}: ${timeoutReason}. Current timeout value is ${hasEsDataTimeout}ms. ` + + `Use "data_views.hasEsDataTimeout" in kibana.yml to change it, or set to 0 to disable timeouts.`; + + logger.warn(warningMessage); + + return res.customError({ + statusCode: 504, + body: { + message: timeoutMessage, + attributes: { failureReason: timeoutReason }, + }, + }); + } + + logger.error(e); + + return res.customError({ + statusCode: 500, + body: { + message: errorMessage, + attributes: { failureReason: HasEsDataFailureReason.unknown }, + }, + }); + } }; -export const registerHasEsDataRoute = (router: IRouter): void => { +export const registerHasEsDataRoute = ( + router: IRouter, + logger: Logger, + hasEsDataTimeout: number +): void => { router.versioned .get({ path: '/internal/data_views/has_es_data', @@ -51,9 +141,18 @@ export const registerHasEsDataRoute = (router: IRouter): void => { hasEsData: schema.boolean(), }), }, + 400: { + body: () => + schema.object({ + message: schema.string(), + attributes: schema.object({ + failureReason: schema.string(), + }), + }), + }, }, }, }, - handler + createHandler(logger, hasEsDataTimeout) ); }; diff --git a/src/plugins/data_views/server/routes.ts b/src/plugins/data_views/server/routes.ts index e5803423d819e..9e8501f928f14 100644 --- a/src/plugins/data_views/server/routes.ts +++ b/src/plugins/data_views/server/routes.ts @@ -7,8 +7,8 @@ * License v3.0 only", or the "Server Side Public License, v 1". */ -import { HttpServiceSetup, StartServicesAccessor } from '@kbn/core/server'; -import { UsageCounter } from '@kbn/usage-collection-plugin/server'; +import type { HttpServiceSetup, Logger, StartServicesAccessor } from '@kbn/core/server'; +import type { UsageCounter } from '@kbn/usage-collection-plugin/server'; import { routes } from './rest_api_routes/public'; import type { DataViewsServerPluginStart, DataViewsServerPluginStartDependencies } from './types'; @@ -20,19 +20,23 @@ import { registerFields } from './rest_api_routes/internal/fields'; interface RegisterRoutesArgs { http: HttpServiceSetup; + logger: Logger; getStartServices: StartServicesAccessor< DataViewsServerPluginStartDependencies, DataViewsServerPluginStart >; isRollupsEnabled: () => boolean; dataViewRestCounter?: UsageCounter; + hasEsDataTimeout: number; } export function registerRoutes({ http, + logger, getStartServices, - dataViewRestCounter, isRollupsEnabled, + dataViewRestCounter, + hasEsDataTimeout, }: RegisterRoutesArgs) { const router = http.createRouter(); @@ -42,5 +46,5 @@ export function registerRoutes({ registerFieldForWildcard(router, getStartServices, isRollupsEnabled); registerFields(router, getStartServices, isRollupsEnabled); registerHasDataViewsRoute(router); - registerHasEsDataRoute(router); + registerHasEsDataRoute(router, logger, hasEsDataTimeout); } diff --git a/src/plugins/data_views/tsconfig.json b/src/plugins/data_views/tsconfig.json index 312de968d6408..45992b3548f8e 100644 --- a/src/plugins/data_views/tsconfig.json +++ b/src/plugins/data_views/tsconfig.json @@ -34,6 +34,9 @@ "@kbn/core-saved-objects-server", "@kbn/logging", "@kbn/crypto-browser", + "@kbn/core-http-browser", + "@kbn/core-http-browser-internal", + "@kbn/logging-mocks", ], "exclude": [ "target/**/*",