From 2d72a3811981eea4cad0dcaeade1aad2ce8ebca4 Mon Sep 17 00:00:00 2001 From: Gerard Soldevila Date: Thu, 14 Dec 2023 17:31:58 +0100 Subject: [PATCH] Inject initial "not reported" to prevent blocking `/api/status` (#173371) ## Summary Addresses https://github.com/elastic/kibana-team/issues/697 The endpoint is currently waiting for all statuses to be reported. [Recent changes](https://github.com/elastic/kibana/pull/172268) are causing the `pluginsStatus$` Observable to take longer to emit first value. This PR injects a "status not reported" to prevent the different status Observables from blocking calls to the `/api/status` enpdoint. --- .../src/routes/status.ts | 29 ++++++++++++++----- .../http/platform/status.ts | 6 ++-- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/packages/core/status/core-status-server-internal/src/routes/status.ts b/packages/core/status/core-status-server-internal/src/routes/status.ts index e06d667b4c78b..59c7aa23c51d4 100644 --- a/packages/core/status/core-status-server-internal/src/routes/status.ts +++ b/packages/core/status/core-status-server-internal/src/routes/status.ts @@ -6,7 +6,7 @@ * Side Public License, v 1. */ -import { Observable, combineLatest, ReplaySubject, firstValueFrom } from 'rxjs'; +import { type Observable, combineLatest, ReplaySubject, firstValueFrom, startWith } from 'rxjs'; import { schema } from '@kbn/config-schema'; import type { PackageInfo } from '@kbn/config'; import type { PluginName } from '@kbn/core-base-common'; @@ -15,12 +15,12 @@ import type { MetricsServiceSetup } from '@kbn/core-metrics-server'; import type { CoreIncrementUsageCounter } from '@kbn/core-usage-data-server'; import type { StatusResponse } from '@kbn/core-status-common-internal'; import { - ServiceStatus, - ServiceStatusLevel, - CoreStatus, + type ServiceStatus, + type ServiceStatusLevel, + type CoreStatus, ServiceStatusLevels, } from '@kbn/core-status-common'; -import { calculateLegacyStatus, LegacyStatusInfo } from '../legacy_status'; +import { calculateLegacyStatus, type LegacyStatusInfo } from '../legacy_status'; const SNAPSHOT_POSTFIX = /-SNAPSHOT$/; @@ -61,6 +61,11 @@ export interface RedactedStatusHttpBody { }; } +const SERVICE_UNAVAILABLE_NOT_REPORTED: ServiceStatus = { + level: ServiceStatusLevels.unavailable, + summary: 'Status not yet reported', +}; + export const registerStatusRoute = ({ router, config, @@ -73,9 +78,17 @@ export const registerStatusRoute = ({ const combinedStatus$ = new ReplaySubject< [ServiceStatus, ServiceStatus, CoreStatus, Record>] >(1); - combineLatest([status.overall$, status.coreOverall$, status.core$, status.plugins$]).subscribe( - combinedStatus$ - ); + combineLatest([ + status.overall$.pipe(startWith(SERVICE_UNAVAILABLE_NOT_REPORTED)), + status.coreOverall$.pipe(startWith(SERVICE_UNAVAILABLE_NOT_REPORTED)), + status.core$.pipe( + startWith({ + elasticsearch: SERVICE_UNAVAILABLE_NOT_REPORTED, + savedObjects: SERVICE_UNAVAILABLE_NOT_REPORTED, + }) + ), + status.plugins$.pipe(startWith({})), + ]).subscribe(combinedStatus$); router.get( { diff --git a/test/server_integration/http/platform/status.ts b/test/server_integration/http/platform/status.ts index 8ce193c670849..df472ec36d818 100644 --- a/test/server_integration/http/platform/status.ts +++ b/test/server_integration/http/platform/status.ts @@ -35,13 +35,13 @@ export default function ({ getService }: FtrProviderContext) { // This test must come first because the timeout only applies to the initial emission it("returns a timeout for status check that doesn't emit after 30s", async () => { let aStatus = await getStatus('statusPluginA'); - expect(aStatus.level).to.eql('unavailable'); + expect(aStatus === undefined || aStatus.level === 'unavailable').to.eql(true); // Status will remain in unavailable until the custom status check times out // Keep polling until that condition ends, up to a timeout await retry.waitForWithTimeout(`Status check to timeout`, 40_000, async () => { aStatus = await getStatus('statusPluginA'); - return aStatus.summary === 'Status check timed out after 30s'; + return aStatus?.summary === 'Status check timed out after 30s'; }); expect(aStatus.level).to.eql('unavailable'); @@ -53,7 +53,7 @@ export default function ({ getService }: FtrProviderContext) { await retry.waitForWithTimeout( `statusPluginA status to update`, 5_000, - async () => (await getStatus('statusPluginA')).level === 'degraded' + async () => (await getStatus('statusPluginA'))?.level === 'degraded' ); await statusPropagation(); expect((await getStatus('statusPluginA')).level).to.eql('degraded');