Skip to content

Commit

Permalink
[KB] create @kbn/product-doc-artifact-builder package (elastic#193847)
Browse files Browse the repository at this point in the history
## Summary

Related elastic#193473

Add initial implementation of the knowledge base artifact builder. This
PR only introduces the builder script, it doesn't do anything about
automation.

---------

Co-authored-by: kibanamachine <[email protected]>
Co-authored-by: Elastic Machine <[email protected]>
(cherry picked from commit 1ab1add)

# Conflicts:
#	.github/CODEOWNERS
  • Loading branch information
pgayvallet committed Oct 14, 2024
1 parent 84faa5c commit 5085255
Show file tree
Hide file tree
Showing 29 changed files with 1,006 additions and 1 deletion.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -1463,6 +1463,7 @@
"@kbn/picomatcher": "link:packages/kbn-picomatcher",
"@kbn/plugin-generator": "link:packages/kbn-plugin-generator",
"@kbn/plugin-helpers": "link:packages/kbn-plugin-helpers",
"@kbn/product-doc-artifact-builder": "link:x-pack/packages/ai-infra/product-doc-artifact-builder",
"@kbn/repo-file-maps": "link:packages/kbn-repo-file-maps",
"@kbn/repo-linter": "link:packages/kbn-repo-linter",
"@kbn/repo-path": "link:packages/kbn-repo-path",
Expand Down
11 changes: 11 additions & 0 deletions scripts/build_product_doc_artifacts.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

require('../src/setup_node_env');
require('@kbn/product-doc-artifact-builder').runScript();
1 change: 1 addition & 0 deletions src/dev/precommit_hook/casing_check_config.js
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ export const IGNORE_DIRECTORY_GLOBS = [
'src/babel-*',
'packages/*',
'packages/core/*/*',
'x-pack/packages/ai-infra/*',
'packages/kbn-pm/src/utils/__fixtures__/*',
'packages/kbn-check-prod-native-modules-cli/integration_tests/__fixtures__/*/node_modules/*',
'x-pack/dev-tools',
Expand Down
4 changes: 3 additions & 1 deletion tsconfig.base.json
Original file line number Diff line number Diff line change
Expand Up @@ -1366,6 +1366,8 @@
"@kbn/presentation-publishing/*": ["packages/presentation/presentation_publishing/*"],
"@kbn/presentation-util-plugin": ["src/plugins/presentation_util"],
"@kbn/presentation-util-plugin/*": ["src/plugins/presentation_util/*"],
"@kbn/product-doc-artifact-builder": ["x-pack/packages/ai-infra/product-doc-artifact-builder"],
"@kbn/product-doc-artifact-builder/*": ["x-pack/packages/ai-infra/product-doc-artifact-builder/*"],
"@kbn/profiling-data-access-plugin": ["x-pack/plugins/observability_solution/profiling_data_access"],
"@kbn/profiling-data-access-plugin/*": ["x-pack/plugins/observability_solution/profiling_data_access/*"],
"@kbn/profiling-plugin": ["x-pack/plugins/observability_solution/profiling"],
Expand Down Expand Up @@ -2084,4 +2086,4 @@
"@kbn/ambient-storybook-types"
]
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# @kbn/product-doc-artifact-builder

Script to build the knowledge base artifacts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

export { runScript } from './src/command';
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

module.exports = {
preset: '@kbn/test/jest_node',
rootDir: '../../../..',
roots: ['<rootDir>/x-pack/packages/ai-infra/product-doc-artifact-builder'],
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"type": "shared-common",
"id": "@kbn/product-doc-artifact-builder",
"owner": "@elastic/appex-ai-infra",
"devOnly": true
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"name": "@kbn/product-doc-artifact-builder",
"private": true,
"version": "1.0.0",
"license": "Elastic License 2.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

export const getArtifactName = ({
productName,
productVersion,
}: {
productName: string;
productVersion: string;
}): string => {
return `kibana-kb-${productName}-${productVersion}.zip`.toLowerCase();
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

export interface ArtifactManifest {
formatVersion: string;
productName: string;
productVersion: string;
}

export const getArtifactManifest = ({
productName,
stackVersion,
}: {
productName: string;
stackVersion: string;
}): ArtifactManifest => {
return {
formatVersion: '1.0.0',
productName,
productVersion: stackVersion,
};
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import type { MappingTypeMapping } from '@elastic/elasticsearch/lib/api/types';

export const getArtifactMappings = (inferenceEndpoint: string): MappingTypeMapping => {
return {
dynamic: 'strict',
properties: {
content_title: { type: 'text' },
content_body: {
type: 'semantic_text',
inference_id: inferenceEndpoint,
},
product_name: { type: 'keyword' },
root_type: { type: 'keyword' },
slug: { type: 'keyword' },
url: { type: 'keyword' },
version: { type: 'version' },
ai_subtitle: {
type: 'semantic_text',
inference_id: inferenceEndpoint,
},
ai_summary: {
type: 'semantic_text',
inference_id: inferenceEndpoint,
},
ai_questions_answered: {
type: 'semantic_text',
inference_id: inferenceEndpoint,
},
ai_tags: { type: 'keyword' },
},
};
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

/**
* The allowed product names, as found in the source's cluster
*/
export const sourceProductNames = ['Kibana', 'Elasticsearch', 'Security', 'Observability'];
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import Path from 'path';
import { Client } from '@elastic/elasticsearch';
import { ToolingLog } from '@kbn/tooling-log';
import {
// checkConnectivity,
createTargetIndex,
extractDocumentation,
indexDocuments,
installElser,
createChunkFiles,
createArtifact,
cleanupFolders,
deleteIndex,
} from './tasks';
import type { TaskConfig } from './types';

const getSourceClient = (config: TaskConfig) => {
return new Client({
compression: true,
nodes: [config.sourceClusterUrl],
sniffOnStart: false,
auth: {
username: config.sourceClusterUsername,
password: config.sourceClusterPassword,
},
});
};

const getEmbeddingClient = (config: TaskConfig) => {
return new Client({
compression: true,
nodes: [config.embeddingClusterUrl],
auth: {
username: config.embeddingClusterUsername,
password: config.embeddingClusterPassword,
},
// generating embeddings takes time
requestTimeout: 10 * 60 * 1000,
});
};

export const buildArtifacts = async (config: TaskConfig) => {
const log = new ToolingLog({
level: 'info',
writeTo: process.stdout,
});

log.info(
`Starting building artifacts for version=[${
config.stackVersion
}] and products=[${config.productNames.join(',')}]`
);

const sourceClient = getSourceClient(config);
const embeddingClient = getEmbeddingClient(config);

// log.info('Checking connectivity against clusters');
// await checkConnectivity({ sourceClient, embeddingClient });

await cleanupFolders({ folders: [config.buildFolder] });

log.info('Ensuring ELSER is installed on the embedding cluster');
await installElser({ client: embeddingClient });

for (const productName of config.productNames) {
await buildArtifact({
productName,
stackVersion: config.stackVersion,
buildFolder: config.buildFolder,
targetFolder: config.targetFolder,
sourceClient,
embeddingClient,
log,
});
}

await cleanupFolders({ folders: [config.buildFolder] });
};

const buildArtifact = async ({
productName,
stackVersion,
buildFolder,
targetFolder,
embeddingClient,
sourceClient,
log,
}: {
productName: string;
stackVersion: string;
buildFolder: string;
targetFolder: string;
sourceClient: Client;
embeddingClient: Client;
log: ToolingLog;
}) => {
log.info(`Starting building artifact for product [${productName}] and version [${stackVersion}]`);

const targetIndex = getTargetIndexName({ productName, stackVersion });

const documents = await extractDocumentation({
client: sourceClient,
index: 'search-docs-1',
log,
productName,
stackVersion,
});

await createTargetIndex({
client: embeddingClient,
indexName: targetIndex,
});

await indexDocuments({
client: embeddingClient,
index: targetIndex,
documents,
log,
});

await createChunkFiles({
index: targetIndex,
client: embeddingClient,
productName,
destFolder: Path.join(buildFolder, productName),
log,
});

await createArtifact({
buildFolder: Path.join(buildFolder, productName),
targetFolder,
productName,
stackVersion,
log,
});

await deleteIndex({
indexName: targetIndex,
client: embeddingClient,
log,
});

log.info(`Finished building artifact for product [${productName}] and version [${stackVersion}]`);
};

const getTargetIndexName = ({
productName,
stackVersion,
}: {
productName: string;
stackVersion: string;
}) => {
return `kb-artifact-builder-${productName}-${stackVersion}`.toLowerCase();
};
Loading

0 comments on commit 5085255

Please sign in to comment.