Skip to content

Commit

Permalink
Debug lockup
Browse files Browse the repository at this point in the history
  • Loading branch information
droberts195 committed Jan 3, 2024
1 parent 5a815c5 commit 0d2ad42
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,9 @@ void loadQueuedModels() {
try {
deploymentManager.startDeployment(loadingTask, listener);
// This needs to be synchronous here in the utility thread to keep queueing order
logger.info("before get deployed task");
TrainedModelDeploymentTask deployedTask = listener.actionGet();
logger.info("after get deployed task");
// kicks off asynchronous cluster state update
handleLoadSuccess(deployedTask);
} catch (Exception ex) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,11 @@ public void startDeployment(TrainedModelDeploymentTask task, ActionListener<Trai

ActionListener<TrainedModelDeploymentTask> failedDeploymentListener = ActionListener.wrap(finalListener::onResponse, failure -> {
ProcessContext failedContext = processContextByAllocation.remove(task.getId());
logger.info("failedContext is " + failedContext);
if (failedContext != null) {
failedContext.forcefullyStopProcess();
}
logger.info("before final listener");
finalListener.onFailure(failure);
});

Expand Down Expand Up @@ -205,7 +207,9 @@ public void startDeployment(TrainedModelDeploymentTask task, ActionListener<Trai

@Override
public void onFailure(Exception e) {
logger.info("before failedDeploymentListener.onFailure");
failedDeploymentListener.onFailure(e);
logger.info("after failedDeploymentListener.onFailure");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public NativePyTorchProcess createProcess(
process.start(executorService);
} catch (IOException | EsRejectedExecutionException e) {
String msg = "Failed to connect to pytorch process for job " + task.getDeploymentId();
logger.error(msg);
logger.error(msg, e);
try {
IOUtils.close(process);
} catch (IOException ioe) {
Expand Down

0 comments on commit 0d2ad42

Please sign in to comment.