From 0d2ad426c83afc246f620d5cdb07f744a8bf033c Mon Sep 17 00:00:00 2001 From: David Roberts Date: Wed, 3 Jan 2024 18:33:59 +0000 Subject: [PATCH] Debug lockup --- .../assignment/TrainedModelAssignmentNodeService.java | 2 ++ .../xpack/ml/inference/deployment/DeploymentManager.java | 4 ++++ .../pytorch/process/NativePyTorchProcessFactory.java | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java index 3fac7c387b12e..8d137fa6fab98 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java @@ -209,7 +209,9 @@ void loadQueuedModels() { try { deploymentManager.startDeployment(loadingTask, listener); // This needs to be synchronous here in the utility thread to keep queueing order + logger.info("before get deployed task"); TrainedModelDeploymentTask deployedTask = listener.actionGet(); + logger.info("after get deployed task"); // kicks off asynchronous cluster state update handleLoadSuccess(deployedTask); } catch (Exception ex) { diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/deployment/DeploymentManager.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/deployment/DeploymentManager.java index 18e89732daf21..8c541e040ee6c 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/deployment/DeploymentManager.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/deployment/DeploymentManager.java @@ -156,9 +156,11 @@ public void startDeployment(TrainedModelDeploymentTask task, ActionListener failedDeploymentListener = ActionListener.wrap(finalListener::onResponse, failure -> { ProcessContext failedContext = processContextByAllocation.remove(task.getId()); + logger.info("failedContext is " + failedContext); if (failedContext != null) { failedContext.forcefullyStopProcess(); } + logger.info("before final listener"); finalListener.onFailure(failure); }); @@ -205,7 +207,9 @@ public void startDeployment(TrainedModelDeploymentTask task, ActionListener