From e15914eb447164eee5ff337871c198c96b415fde Mon Sep 17 00:00:00 2001 From: Kibana Machine <42973632+kibanamachine@users.noreply.github.com> Date: Tue, 26 Nov 2024 23:21:05 +1100 Subject: [PATCH] [8.16] [ML] Trained Model: Fix start deployment with ML autoscaling and 0 active nodes (#201256) (#201746) # Backport This will backport the following commits from `main` to `8.16`: - [[ML] Trained Model: Fix start deployment with ML autoscaling and 0 active nodes (#201256)](https://github.com/elastic/kibana/pull/201256) ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport) Co-authored-by: Dima Arnautov --- .../deployment_params_mapper.test.ts | 291 ++++++++++++++++++ .../deployment_params_mapper.ts | 22 +- x-pack/plugins/ml/server/lib/node_utils.ts | 2 +- x-pack/plugins/ml/server/routes/system.ts | 11 +- 4 files changed, 315 insertions(+), 11 deletions(-) diff --git a/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.test.ts b/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.test.ts index 34875b893a867..4193251d76f3a 100644 --- a/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.test.ts +++ b/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.test.ts @@ -627,6 +627,297 @@ describe('DeploymentParamsMapper', () => { }, }); }); + + describe('mapApiToUiDeploymentParams', () => { + it('should map API params to UI correctly', () => { + // Optimized for search + expect( + mapper.mapApiToUiDeploymentParams({ + model_id: modelId, + deployment_id: 'test-deployment', + priority: 'normal', + threads_per_allocation: 16, + number_of_allocations: 2, + } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive) + ).toEqual({ + deploymentId: 'test-deployment', + optimized: 'optimizedForSearch', + adaptiveResources: false, + vCPUUsage: 'medium', + }); + + // Lower value + expect( + mapper.mapApiToUiDeploymentParams({ + model_id: modelId, + deployment_id: 'test-deployment', + priority: 'normal', + threads_per_allocation: 16, + number_of_allocations: 1, + } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive) + ).toEqual({ + deploymentId: 'test-deployment', + optimized: 'optimizedForSearch', + adaptiveResources: false, + vCPUUsage: 'medium', + }); + + expect( + mapper.mapApiToUiDeploymentParams({ + model_id: modelId, + deployment_id: 'test-deployment', + priority: 'normal', + threads_per_allocation: 8, + number_of_allocations: 2, + } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive) + ).toEqual({ + deploymentId: 'test-deployment', + optimized: 'optimizedForSearch', + adaptiveResources: false, + vCPUUsage: 'medium', + }); + + expect( + mapper.mapApiToUiDeploymentParams({ + model_id: modelId, + deployment_id: 'test-deployment', + priority: 'normal', + threads_per_allocation: 2, + number_of_allocations: 1, + } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive) + ).toEqual({ + deploymentId: 'test-deployment', + optimized: 'optimizedForSearch', + adaptiveResources: false, + vCPUUsage: 'low', + }); + + // Exact match + expect( + mapper.mapApiToUiDeploymentParams({ + model_id: modelId, + deployment_id: 'test-deployment', + priority: 'normal', + threads_per_allocation: 16, + number_of_allocations: 8, + } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive) + ).toEqual({ + deploymentId: 'test-deployment', + optimized: 'optimizedForSearch', + adaptiveResources: false, + vCPUUsage: 'high', + }); + + // Higher value + expect( + mapper.mapApiToUiDeploymentParams({ + model_id: modelId, + deployment_id: 'test-deployment', + priority: 'normal', + threads_per_allocation: 16, + number_of_allocations: 12, + } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive) + ).toEqual({ + deploymentId: 'test-deployment', + optimized: 'optimizedForSearch', + adaptiveResources: false, + vCPUUsage: 'high', + }); + + // Lower value + expect( + mapper.mapApiToUiDeploymentParams({ + model_id: modelId, + deployment_id: 'test-deployment', + priority: 'normal', + threads_per_allocation: 16, + number_of_allocations: 5, + } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive) + ).toEqual({ + deploymentId: 'test-deployment', + optimized: 'optimizedForSearch', + adaptiveResources: false, + vCPUUsage: 'high', + }); + + expect( + mapper.mapApiToUiDeploymentParams({ + model_id: modelId, + deployment_id: 'test-deployment', + priority: 'normal', + threads_per_allocation: 16, + number_of_allocations: 6, + } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive) + ).toEqual({ + deploymentId: 'test-deployment', + optimized: 'optimizedForSearch', + adaptiveResources: false, + vCPUUsage: 'high', + }); + + // Optimized for ingest + expect( + mapper.mapApiToUiDeploymentParams({ + model_id: modelId, + deployment_id: 'test-deployment', + priority: 'normal', + threads_per_allocation: 1, + number_of_allocations: 1, + } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive) + ).toEqual({ + deploymentId: 'test-deployment', + optimized: 'optimizedForIngest', + adaptiveResources: false, + vCPUUsage: 'low', + }); + + expect( + mapper.mapApiToUiDeploymentParams({ + model_id: modelId, + deployment_id: 'test-deployment', + priority: 'normal', + threads_per_allocation: 1, + number_of_allocations: 2, + } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive) + ).toEqual({ + deploymentId: 'test-deployment', + optimized: 'optimizedForIngest', + adaptiveResources: false, + vCPUUsage: 'low', + }); + + expect( + mapper.mapApiToUiDeploymentParams({ + model_id: modelId, + deployment_id: 'test-deployment', + priority: 'normal', + threads_per_allocation: 1, + number_of_allocations: 6, + } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive) + ).toEqual({ + deploymentId: 'test-deployment', + optimized: 'optimizedForIngest', + adaptiveResources: false, + vCPUUsage: 'medium', + }); + }); + + it('should map API params to UI correctly with adaptive resources', () => { + expect( + mapper.mapApiToUiDeploymentParams({ + model_id: modelId, + deployment_id: 'test-deployment', + priority: 'normal', + threads_per_allocation: 8, + adaptive_allocations: { + enabled: true, + min_number_of_allocations: 2, + max_number_of_allocations: 2, + }, + } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive) + ).toEqual({ + deploymentId: 'test-deployment', + optimized: 'optimizedForSearch', + adaptiveResources: true, + vCPUUsage: 'medium', + }); + + expect( + mapper.mapApiToUiDeploymentParams({ + model_id: modelId, + deployment_id: 'test-deployment', + priority: 'normal', + threads_per_allocation: 2, + adaptive_allocations: { + enabled: true, + min_number_of_allocations: 2, + max_number_of_allocations: 2, + }, + } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive) + ).toEqual({ + deploymentId: 'test-deployment', + optimized: 'optimizedForSearch', + adaptiveResources: true, + vCPUUsage: 'medium', + }); + + expect( + mapper.mapApiToUiDeploymentParams({ + model_id: modelId, + deployment_id: 'test-deployment', + priority: 'normal', + threads_per_allocation: 1, + adaptive_allocations: { + enabled: true, + min_number_of_allocations: 1, + max_number_of_allocations: 1, + }, + } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive) + ).toEqual({ + deploymentId: 'test-deployment', + optimized: 'optimizedForIngest', + adaptiveResources: true, + vCPUUsage: 'low', + }); + + expect( + mapper.mapApiToUiDeploymentParams({ + model_id: modelId, + deployment_id: 'test-deployment', + priority: 'normal', + threads_per_allocation: 2, + adaptive_allocations: { + enabled: true, + min_number_of_allocations: 0, + max_number_of_allocations: 1, + }, + } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive) + ).toEqual({ + deploymentId: 'test-deployment', + optimized: 'optimizedForSearch', + adaptiveResources: true, + vCPUUsage: 'low', + }); + + expect( + mapper.mapApiToUiDeploymentParams({ + model_id: modelId, + deployment_id: 'test-deployment', + priority: 'normal', + threads_per_allocation: 1, + adaptive_allocations: { + enabled: true, + min_number_of_allocations: 0, + max_number_of_allocations: 64, + }, + } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive) + ).toEqual({ + deploymentId: 'test-deployment', + optimized: 'optimizedForIngest', + adaptiveResources: true, + vCPUUsage: 'high', + }); + + expect( + mapper.mapApiToUiDeploymentParams({ + model_id: modelId, + deployment_id: 'test-deployment', + priority: 'normal', + threads_per_allocation: 16, + adaptive_allocations: { + enabled: true, + min_number_of_allocations: 0, + max_number_of_allocations: 12, + }, + } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive) + ).toEqual({ + deploymentId: 'test-deployment', + optimized: 'optimizedForSearch', + adaptiveResources: true, + vCPUUsage: 'high', + }); + }); + }); }); }); }); diff --git a/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.ts b/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.ts index ecb8a06198b1c..96ba5f0755caa 100644 --- a/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.ts +++ b/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.ts @@ -25,7 +25,7 @@ type VCPUBreakpoints = Record< max: number; /** * Static value is used for the number of vCPUs when the adaptive resources are disabled. - * Not allowed in certain environments. + * Not allowed in certain environments, Obs and Security serverless projects. */ static?: number; } @@ -89,6 +89,7 @@ export class DeploymentParamsMapper { ) { /** * Initial value can be different for serverless and ESS with autoscaling. + * Also not available with 0 ML active nodes. */ const maxSingleMlNodeProcessors = this.mlServerLimits.max_single_ml_node_processors; @@ -236,18 +237,25 @@ export class DeploymentParamsMapper { ? input.adaptive_allocations!.max_number_of_allocations! : input.number_of_allocations); + // The deployment can be created via API with a number of allocations that do not exactly match our vCPU ranges. + // In this case, we should find the closest vCPU range that does not exceed the max or static value of the range. const [vCPUUsage] = Object.entries(this.vCpuBreakpoints) - .reverse() - .find(([key, val]) => vCPUs >= val.min) as [ - DeploymentParamsUI['vCPUUsage'], - { min: number; max: number } - ]; + .filter(([, range]) => vCPUs <= (adaptiveResources ? range.max : range.static!)) + .reduce( + (prev, curr) => { + const prevValue = adaptiveResources ? prev[1].max : prev[1].static!; + const currValue = adaptiveResources ? curr[1].max : curr[1].static!; + return Math.abs(vCPUs - prevValue) <= Math.abs(vCPUs - currValue) ? prev : curr; + }, + // in case allocation params exceed the max value of the high range + ['high', this.vCpuBreakpoints.high] + ); return { deploymentId: input.deployment_id, optimized, adaptiveResources, - vCPUUsage, + vCPUUsage: vCPUUsage as DeploymentParamsUI['vCPUUsage'], }; } } diff --git a/x-pack/plugins/ml/server/lib/node_utils.ts b/x-pack/plugins/ml/server/lib/node_utils.ts index d8098e3c43f42..9c5c0348f03da 100644 --- a/x-pack/plugins/ml/server/lib/node_utils.ts +++ b/x-pack/plugins/ml/server/lib/node_utils.ts @@ -33,7 +33,7 @@ export async function getMlNodeCount(client: IScopedClusterClient): Promise { const body = await client.asInternalUser.cluster.getSettings( { include_defaults: true, diff --git a/x-pack/plugins/ml/server/routes/system.ts b/x-pack/plugins/ml/server/routes/system.ts index 0804a8dc02348..04062db91305c 100644 --- a/x-pack/plugins/ml/server/routes/system.ts +++ b/x-pack/plugins/ml/server/routes/system.ts @@ -14,7 +14,7 @@ import { mlLog } from '../lib/log'; import { capabilitiesProvider } from '../lib/capabilities'; import { spacesUtilsProvider } from '../lib/spaces_utils'; import type { RouteInitialization, SystemRouteDeps } from '../types'; -import { getMlNodeCount } from '../lib/node_utils'; +import { getLazyMlNodeCount, getMlNodeCount } from '../lib/node_utils'; /** * System routes @@ -174,10 +174,15 @@ export function systemRoutes( let isMlAutoscalingEnabled = false; try { - await client.asInternalUser.autoscaling.getAutoscalingPolicy({ name: 'ml' }); + // kibana_system user does not have the manage_autoscaling cluster privilege. + // perform this check as a current user. + await client.asCurrentUser.autoscaling.getAutoscalingPolicy({ name: 'ml' }); isMlAutoscalingEnabled = true; } catch (e) { - // If doesn't exist, then keep the false + // If ml autoscaling policy doesn't exist or the user does not have privileges to fetch it, + // check the number of lazy ml nodes to determine if autoscaling is enabled. + const lazyMlNodeCount = await getLazyMlNodeCount(client); + isMlAutoscalingEnabled = lazyMlNodeCount > 0; } return response.ok({