[8.17] [ML] Trained Model: Fix start deployment with ML autoscaling a…

…nd 0 active nodes (#201256) (#201747) # Backport This will backport the following commits from `main` to `8.17`: - [[ML] Trained Model: Fix start deployment with ML autoscaling and 0 active nodes (#201256)](#201256)  ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport)  Co-authored-by: Dima Arnautov <[email protected]>
elastic · Nov 26, 2024 · 5f5667a · 5f5667a
1 parent 62c1d11
commit 5f5667a
Show file tree

Hide file tree

Showing 4 changed files with 315 additions and 11 deletions.
diff --git a/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.test.ts b/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.test.ts
@@ -627,6 +627,297 @@ describe('DeploymentParamsMapper', () => {
           },
         });
       });
+
+      describe('mapApiToUiDeploymentParams', () => {
+        it('should map API params to UI correctly', () => {
+          // Optimized for search
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 16,
+              number_of_allocations: 2,
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: false,
+            vCPUUsage: 'medium',
+          });
+
+          // Lower value
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 16,
+              number_of_allocations: 1,
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: false,
+            vCPUUsage: 'medium',
+          });
+
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 8,
+              number_of_allocations: 2,
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: false,
+            vCPUUsage: 'medium',
+          });
+
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 2,
+              number_of_allocations: 1,
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: false,
+            vCPUUsage: 'low',
+          });
+
+          // Exact match
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 16,
+              number_of_allocations: 8,
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: false,
+            vCPUUsage: 'high',
+          });
+
+          // Higher value
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 16,
+              number_of_allocations: 12,
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: false,
+            vCPUUsage: 'high',
+          });
+
+          // Lower value
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 16,
+              number_of_allocations: 5,
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: false,
+            vCPUUsage: 'high',
+          });
+
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 16,
+              number_of_allocations: 6,
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: false,
+            vCPUUsage: 'high',
+          });
+
+          // Optimized for ingest
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 1,
+              number_of_allocations: 1,
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForIngest',
+            adaptiveResources: false,
+            vCPUUsage: 'low',
+          });
+
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 1,
+              number_of_allocations: 2,
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForIngest',
+            adaptiveResources: false,
+            vCPUUsage: 'low',
+          });
+
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 1,
+              number_of_allocations: 6,
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForIngest',
+            adaptiveResources: false,
+            vCPUUsage: 'medium',
+          });
+        });
+
+        it('should map API params to UI correctly with adaptive resources', () => {
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 8,
+              adaptive_allocations: {
+                enabled: true,
+                min_number_of_allocations: 2,
+                max_number_of_allocations: 2,
+              },
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: true,
+            vCPUUsage: 'medium',
+          });
+
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 2,
+              adaptive_allocations: {
+                enabled: true,
+                min_number_of_allocations: 2,
+                max_number_of_allocations: 2,
+              },
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: true,
+            vCPUUsage: 'medium',
+          });
+
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 1,
+              adaptive_allocations: {
+                enabled: true,
+                min_number_of_allocations: 1,
+                max_number_of_allocations: 1,
+              },
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForIngest',
+            adaptiveResources: true,
+            vCPUUsage: 'low',
+          });
+
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 2,
+              adaptive_allocations: {
+                enabled: true,
+                min_number_of_allocations: 0,
+                max_number_of_allocations: 1,
+              },
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: true,
+            vCPUUsage: 'low',
+          });
+
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 1,
+              adaptive_allocations: {
+                enabled: true,
+                min_number_of_allocations: 0,
+                max_number_of_allocations: 64,
+              },
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForIngest',
+            adaptiveResources: true,
+            vCPUUsage: 'high',
+          });
+
+          expect(
+            mapper.mapApiToUiDeploymentParams({
+              model_id: modelId,
+              deployment_id: 'test-deployment',
+              priority: 'normal',
+              threads_per_allocation: 16,
+              adaptive_allocations: {
+                enabled: true,
+                min_number_of_allocations: 0,
+                max_number_of_allocations: 12,
+              },
+            } as unknown as MlTrainedModelAssignmentTaskParametersAdaptive)
+          ).toEqual({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: true,
+            vCPUUsage: 'high',
+          });
+        });
+      });
     });
   });
 });
diff --git a/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.ts b/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.ts
@@ -25,7 +25,7 @@ type VCPUBreakpoints = Record<
     max: number;
     /**
      * Static value is used for the number of vCPUs when the adaptive resources are disabled.
-     * Not allowed in certain environments.
+     * Not allowed in certain environments, Obs and Security serverless projects.
      */
     static?: number;
   }
@@ -89,6 +89,7 @@ export class DeploymentParamsMapper {
   ) {
     /**
      * Initial value can be different for serverless and ESS with autoscaling.
+     * Also not available with 0 ML active nodes.
      */
     const maxSingleMlNodeProcessors = this.mlServerLimits.max_single_ml_node_processors;
 
@@ -236,18 +237,25 @@ export class DeploymentParamsMapper {
         ? input.adaptive_allocations!.max_number_of_allocations!
         : input.number_of_allocations);
 
+    // The deployment can be created via API with a number of allocations that do not exactly match our vCPU ranges.
+    // In this case, we should find the closest vCPU range that does not exceed the max or static value of the range.
     const [vCPUUsage] = Object.entries(this.vCpuBreakpoints)
-      .reverse()
-      .find(([key, val]) => vCPUs >= val.min) as [
-      DeploymentParamsUI['vCPUUsage'],
-      { min: number; max: number }
-    ];
+      .filter(([, range]) => vCPUs <= (adaptiveResources ? range.max : range.static!))
+      .reduce(
+        (prev, curr) => {
+          const prevValue = adaptiveResources ? prev[1].max : prev[1].static!;
+          const currValue = adaptiveResources ? curr[1].max : curr[1].static!;
+          return Math.abs(vCPUs - prevValue) <= Math.abs(vCPUs - currValue) ? prev : curr;
+        },
+        // in case allocation params exceed the max value of the high range
+        ['high', this.vCpuBreakpoints.high]
+      );
 
     return {
       deploymentId: input.deployment_id,
       optimized,
       adaptiveResources,
-      vCPUUsage,
+      vCPUUsage: vCPUUsage as DeploymentParamsUI['vCPUUsage'],
     };
   }
 }

diff --git a/x-pack/plugins/ml/server/lib/node_utils.ts b/x-pack/plugins/ml/server/lib/node_utils.ts
@@ -33,7 +33,7 @@ export async function getMlNodeCount(client: IScopedClusterClient): Promise<MlNo
   return { count, lazyNodeCount };
 }
 
-export async function getLazyMlNodeCount(client: IScopedClusterClient) {
+export async function getLazyMlNodeCount(client: IScopedClusterClient): Promise<number> {
   const body = await client.asInternalUser.cluster.getSettings(
     {
       include_defaults: true,

diff --git a/x-pack/plugins/ml/server/routes/system.ts b/x-pack/plugins/ml/server/routes/system.ts
@@ -14,7 +14,7 @@ import { mlLog } from '../lib/log';
 import { capabilitiesProvider } from '../lib/capabilities';
 import { spacesUtilsProvider } from '../lib/spaces_utils';
 import type { RouteInitialization, SystemRouteDeps } from '../types';
-import { getMlNodeCount } from '../lib/node_utils';
+import { getLazyMlNodeCount, getMlNodeCount } from '../lib/node_utils';
 
 /**
  * System routes
@@ -187,10 +187,15 @@ export function systemRoutes(
 
           let isMlAutoscalingEnabled = false;
           try {
-            await client.asInternalUser.autoscaling.getAutoscalingPolicy({ name: 'ml' });
+            // kibana_system user does not have the manage_autoscaling cluster privilege.
+            // perform this check as a current user.
+            await client.asCurrentUser.autoscaling.getAutoscalingPolicy({ name: 'ml' });
             isMlAutoscalingEnabled = true;
           } catch (e) {
-            // If doesn't exist, then keep the false
+            // If ml autoscaling policy doesn't exist or the user does not have privileges to fetch it,
+            // check the number of lazy ml nodes to determine if autoscaling is enabled.
+            const lazyMlNodeCount = await getLazyMlNodeCount(client);
+            isMlAutoscalingEnabled = lazyMlNodeCount > 0;
           }
 
           return response.ok({