From 3ad72b048949c2f6fd7c50457748d4b0e794bf2f Mon Sep 17 00:00:00 2001 From: Alex Szabo Date: Mon, 16 Sep 2024 10:43:12 +0200 Subject: [PATCH] [CI] Archive logs from `yarn es` docker runs (#189231) ## Summary The problem we're trying to solve here is to get access to `elasticsearch-serverless` logs when they're started in docker containers in the background (and `elasticsearch`, although currently we don't test against that in docker for now). ## Solution In essence: - we needed to remove the `--rm` flag, this would allow for the containers to stay present after they're done. - after this, we can run `docker logs ...` on FTR post-hooks, save these, then archive these files to buildkite - because the containers are not removed upon finishing, we need to clean up dangling containers before starting up Backporting is probably not necessary, because this is only applicable for serverless - and serverless is only supposed to run on main. Solves: https://github.com/elastic/kibana/issues/191505 (cherry picked from commit bce4a17f088969621ade141a8d19ff3fcde833b0) --- .buildkite/scripts/lifecycle/post_command.sh | 1 + packages/kbn-es/src/utils/docker.test.ts | 6 +- packages/kbn-es/src/utils/docker.ts | 43 +++++++---- .../src/utils/extract_and_archive_logs.ts | 73 +++++++++++++++++++ packages/kbn-es/src/utils/index.ts | 1 + .../functional_tests/lib/run_elasticsearch.ts | 4 +- 6 files changed, 111 insertions(+), 17 deletions(-) create mode 100644 packages/kbn-es/src/utils/extract_and_archive_logs.ts diff --git a/.buildkite/scripts/lifecycle/post_command.sh b/.buildkite/scripts/lifecycle/post_command.sh index 26578f9b9cce1..f90a4b451be1f 100755 --- a/.buildkite/scripts/lifecycle/post_command.sh +++ b/.buildkite/scripts/lifecycle/post_command.sh @@ -35,6 +35,7 @@ if [[ "$IS_TEST_EXECUTION_STEP" == "true" ]]; then buildkite-agent artifact upload 'x-pack/test/functional/failure_debug/html/*.html' buildkite-agent artifact upload '.es/**/*.hprof' buildkite-agent artifact upload 'data/es_debug_*.tar.gz' + buildkite-agent artifact upload '.es/es*.log' if [[ $BUILDKITE_COMMAND_EXIT_STATUS -ne 0 ]]; then if [[ $BUILDKITE_TRIGGERED_FROM_BUILD_PIPELINE_SLUG == 'elasticsearch-serverless-intake' ]]; then diff --git a/packages/kbn-es/src/utils/docker.test.ts b/packages/kbn-es/src/utils/docker.test.ts index a128db03d6ad2..93dee967ee8ac 100644 --- a/packages/kbn-es/src/utils/docker.test.ts +++ b/packages/kbn-es/src/utils/docker.test.ts @@ -665,12 +665,13 @@ describe('runServerlessCluster()', () => { // docker version (1) // docker ps (1) + // docker container rm (3) // docker network create (1) // docker pull (1) // docker inspect (1) // docker run (3) // docker logs (1) - expect(execa.mock.calls).toHaveLength(9); + expect(execa.mock.calls).toHaveLength(12); }); test(`should wait for serverless nodes to return 'green' status`, async () => { @@ -806,11 +807,12 @@ describe('runDockerContainer()', () => { await expect(runDockerContainer(log, {})).resolves.toBeUndefined(); // docker version (1) // docker ps (1) + // docker container rm (3) // docker network create (1) // docker pull (1) // docker inspect (1) // docker run (1) - expect(execa.mock.calls).toHaveLength(6); + expect(execa.mock.calls).toHaveLength(9); }); }); diff --git a/packages/kbn-es/src/utils/docker.ts b/packages/kbn-es/src/utils/docker.ts index c36ed5e8a4bae..6120cc2af0561 100644 --- a/packages/kbn-es/src/utils/docker.ts +++ b/packages/kbn-es/src/utils/docker.ts @@ -113,8 +113,6 @@ const DOCKER_REGISTRY = 'docker.elastic.co'; const DOCKER_BASE_CMD = [ 'run', - '--rm', - '-t', '--net', @@ -151,8 +149,6 @@ export const ES_SERVERLESS_DEFAULT_IMAGE = `${ES_SERVERLESS_REPO_KIBANA}:${ES_SE const SHARED_SERVERLESS_PARAMS = [ 'run', - '--rm', - '--detach', '--interactive', @@ -391,7 +387,6 @@ const RETRYABLE_DOCKER_PULL_ERROR_MESSAGES = [ ]; /** - * * Pull a Docker image if needed. Ensures latest image. * Stops serverless from pulling the same image in each node's promise and * gives better control of log output, instead of falling back to docker run. @@ -443,6 +438,24 @@ export async function printESImageInfo(log: ToolingLog, image: string) { log.info(`Using ES image: ${imageFullName} (${revisionUrl})`); } +export async function cleanUpDanglingContainers(log: ToolingLog) { + log.info(chalk.bold('Cleaning up dangling Docker containers.')); + + try { + const serverlessContainerNames = SERVERLESS_NODES.map(({ name }) => name); + + for (const name of serverlessContainerNames) { + await execa('docker', ['container', 'rm', name, '--force']).catch(() => { + // Ignore errors if the container doesn't exist + }); + } + + log.success('Cleaned up dangling Docker containers.'); + } catch (e) { + log.error(e); + } +} + export async function detectRunningNodes( log: ToolingLog, options: ServerlessOptions | DockerOptions @@ -454,19 +467,19 @@ export async function detectRunningNodes( }, []); const { stdout } = await execa('docker', ['ps', '--quiet'].concat(namesCmd)); - const runningNodes = stdout.split(/\r?\n/).filter((s) => s); + const runningNodeIds = stdout.split(/\r?\n/).filter((s) => s); - if (runningNodes.length) { + if (runningNodeIds.length) { if (options.kill) { log.info(chalk.bold('Killing running ES Nodes.')); - await execa('docker', ['kill'].concat(runningNodes)); - - return; + await execa('docker', ['kill'].concat(runningNodeIds)); + } else { + throw createCliError( + 'ES has already been started, pass --kill to automatically stop the nodes on startup.' + ); } - - throw createCliError( - 'ES has already been started, pass --kill to automatically stop the nodes on startup.' - ); + } else { + log.info('No running nodes detected.'); } } @@ -484,6 +497,7 @@ async function setupDocker({ }) { await verifyDockerInstalled(log); await detectRunningNodes(log, options); + await cleanUpDanglingContainers(log); await maybeCreateDockerNetwork(log); await maybePullDockerImage(log, image); await printESImageInfo(log, image); @@ -774,6 +788,7 @@ export async function runServerlessCluster(log: ToolingLog, options: ServerlessO const volumeCmd = await setupServerlessVolumes(log, options); const portCmd = resolvePort(options); + // This is where nodes are started const nodeNames = await Promise.all( SERVERLESS_NODES.map(async (node, i) => { await runServerlessEsNode(log, { diff --git a/packages/kbn-es/src/utils/extract_and_archive_logs.ts b/packages/kbn-es/src/utils/extract_and_archive_logs.ts new file mode 100644 index 0000000000000..28dde547f6b0d --- /dev/null +++ b/packages/kbn-es/src/utils/extract_and_archive_logs.ts @@ -0,0 +1,73 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +import type { ToolingLog } from '@kbn/tooling-log'; + +import execa from 'execa'; +import Fsp from 'fs/promises'; +import { join } from 'path'; + +import { REPO_ROOT } from '@kbn/repo-info'; + +/** + * Extracts logs from Docker nodes, writes them to files, and returns the file paths. + */ +export async function extractAndArchiveLogs({ + outputFolder, + log, + nodeNames, +}: { + log: ToolingLog; + nodeNames?: string[]; + outputFolder?: string; +}) { + outputFolder = outputFolder || join(REPO_ROOT, '.es'); + const logFiles: string[] = []; + + if (!nodeNames) { + const { stdout: nodeNamesString } = await execa('docker', [ + 'ps', + '-a', + '--format', + '{{.Names}}', + ]); + nodeNames = nodeNamesString.split('\n').filter(Boolean); + } + + if (!nodeNames.length) { + log.info('No Docker nodes found to extract logs from'); + return; + } else { + log.info(`Attempting to extract logs from Docker nodes to ${outputFolder}`); + } + + for (const name of nodeNames) { + const { stdout: nodeId } = await execa('docker', [ + 'ps', + '-a', + '--quiet', + '--filter', + `name=${name}`, + ]); + if (!nodeId) { + continue; + } + + const { stdout } = await execa('docker', ['logs', name]); + const targetFile = `${name}-${nodeId}.log`; + const targetPath = join(outputFolder, targetFile); + + await Fsp.writeFile(targetPath, stdout); + logFiles.push(targetFile); + + log.info(`Archived logs for ${name} to ${targetPath}`); + } + + return logFiles; +} diff --git a/packages/kbn-es/src/utils/index.ts b/packages/kbn-es/src/utils/index.ts index dd57c54d4a101..e1a51ecb44685 100644 --- a/packages/kbn-es/src/utils/index.ts +++ b/packages/kbn-es/src/utils/index.ts @@ -20,3 +20,4 @@ export * from './parse_timeout_to_ms'; export * from './docker'; export * from './serverless_file_realm'; export * from './read_roles_from_resource'; +export * from './extract_and_archive_logs'; diff --git a/packages/kbn-test/src/functional_tests/lib/run_elasticsearch.ts b/packages/kbn-test/src/functional_tests/lib/run_elasticsearch.ts index c87065fd1cdaf..724cf5bc2b25e 100644 --- a/packages/kbn-test/src/functional_tests/lib/run_elasticsearch.ts +++ b/packages/kbn-test/src/functional_tests/lib/run_elasticsearch.ts @@ -13,7 +13,7 @@ import type { ToolingLog } from '@kbn/tooling-log'; import getPort from 'get-port'; import { REPO_ROOT } from '@kbn/repo-info'; import type { ArtifactLicense, ServerlessProjectType } from '@kbn/es'; -import { isServerlessProjectType } from '@kbn/es/src/utils'; +import { isServerlessProjectType, extractAndArchiveLogs } from '@kbn/es/src/utils'; import type { Config } from '../../functional_test_runner'; import { createTestEsCluster, esTestConfig } from '../../es'; @@ -91,6 +91,7 @@ export async function runElasticsearch( }); return async () => { await node.cleanup(); + await extractAndArchiveLogs({ outputFolder: logsDir, log }); }; } @@ -119,6 +120,7 @@ export async function runElasticsearch( return async () => { await localNode.cleanup(); await remoteNode.cleanup(); + await extractAndArchiveLogs({ outputFolder: logsDir, log }); }; }