From 660a045662fb85aa68f0e7b5588b7ca160932cc6 Mon Sep 17 00:00:00 2001 From: Rudi Schlatte Date: Thu, 14 Nov 2024 15:49:20 +0100 Subject: [PATCH] Send cluster status while deploying While the cluster is starting or reconfiguring during deployment and redeployment, we'll send out periodic messages to the `eu.nebulouscloud.optimiser.controller.app_state` channel of the following form: ``` { "when": "2024-04-17T07:54:00.169580700Z", "state": "DEPLOYING", "clusterState": { ... } } ``` If the `clusterState` key exists, its value will be the return value of SAL's getCluster endpoint. The value format is defined in the sal-common class `Cluster`; see https://github.com/ow2-proactive/scheduling-abstraction-layer/blob/master/sal-common/src/main/java/org/ow2/proactive/sal/model/Cluster.java Note that the `clusterState` key may be missing from the status message; this is the case when deployment or redeployment has been triggered but the optimiser-controller is still calculating the necessary changes to the cluster. --- .../optimiser/controller/ExnConnector.java | 20 +++++++++++++++++++ .../optimiser/controller/NebulousApp.java | 18 +++++++++++++++++ .../controller/NebulousAppDeployer.java | 11 +++++++--- 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/ExnConnector.java b/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/ExnConnector.java index e0cfe9b..b1aa14c 100644 --- a/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/ExnConnector.java +++ b/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/ExnConnector.java @@ -1023,4 +1023,24 @@ public void sendAppStatus(String appID, NebulousApp.State state) { appStatusPublisher.send(msg, appID); } + /** + * Broadcasts an application's state, with an auxiliary data value + * attached.

+ * + * Messages are in the same form as sent by {@link #sendAppStatus(String, + * NebulousApp.State)} but with an additional {@code key: value} entry in + * the status message. + * + * @param appID the application id. + * @param state the state of the application. + * @param key the key of an additional entry in the status message + * @param value the value of an additional entry in the status message + */ + public void sendAppStatus(String appID, NebulousApp.State state, String key, JsonNode value) { + Map msg = Map.of( + "state", state.toString(), + key, mapper.convertValue(value, Map.class)); + appStatusPublisher.send(msg, appID); + } + } diff --git a/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousApp.java b/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousApp.java index b00c98f..fd8695f 100644 --- a/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousApp.java +++ b/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousApp.java @@ -389,6 +389,24 @@ public boolean setStateDeploying() { } } + /** + * If app is in the DEPLOYING state, sends a DEPLOYING state message, with + * the cluster status as reported by SAL. Otherwise does nothing. + * + * @param clusterStatus a JSON node with the cluster status returned by + * the getCluster endpoint. + * @return true if status message sent, false otherwise. + */ + @Synchronized + public boolean sendDeploymentStatus(JsonNode clusterState) { + if (state == State.DEPLOYING) { + exnConnector.sendAppStatus(UUID, state, "clusterState", clusterState); + return true; + } else { + return false; + } + } + /** Set state from DEPLOYING to RUNNING and update app cluster information. * @return false if not in state deploying, otherwise true. */ @Synchronized diff --git a/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousAppDeployer.java b/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousAppDeployer.java index 5278bbc..3365a8c 100644 --- a/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousAppDeployer.java +++ b/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousAppDeployer.java @@ -290,7 +290,9 @@ public static String createNodeName(String clusterName, String componentName, in * @param appID The application id. * @param clusterName The name of the cluster to poll. */ - private static boolean waitForClusterDeploymentFinished(ExnConnector conn, String appID, String clusterName) { + private static boolean waitForClusterDeploymentFinished(ExnConnector conn, NebulousApp app) { + String appID = app.getUUID(); + String clusterName = app.getClusterName(); final int pollInterval = 10000; // Check status every 10s int callsSincePrinting = 0; // number of intervals since we last logged what we're doing int failedCalls = 0; @@ -310,6 +312,7 @@ private static boolean waitForClusterDeploymentFinished(ExnConnector conn, Strin if (clusterState != null) { JsonNode jsonState = clusterState.at("/status"); status = jsonState.isMissingNode() ? null : jsonState.asText(); + app.sendDeploymentStatus(clusterState); } else { status = null; } @@ -611,7 +614,7 @@ public static void deployApplication(NebulousApp app, JsonNode kubevela) { return; } - if (!waitForClusterDeploymentFinished(conn, appUUID, clusterName)) { + if (!waitForClusterDeploymentFinished(conn, app)) { log.error("Error while waiting for deployCluster to finish, trying to delete cluster {} and aborting deployment", cluster); app.setStateFailed(); @@ -844,7 +847,9 @@ public static void redeployApplication(NebulousApp app, ObjectNode updatedKubeve log.info("Starting scaleout: {}", nodesToAdd); Main.logFile("redeploy-scaleout-" + appUUID + ".json", nodesToAdd.toPrettyString()); conn.scaleOut(appUUID, clusterName, nodesToAdd); - waitForClusterDeploymentFinished(conn, appUUID, clusterName); + // TODO: check for error and set app state failed? (See the + // other call to waitForClusterDeploymentFinished) + waitForClusterDeploymentFinished(conn, app); } else { log.info("No nodes added, skipping scaleout"); }