Skip to content

Commit

Permalink
Send cluster status while deploying
Browse files Browse the repository at this point in the history
While the cluster is starting or reconfiguring during deployment and
redeployment, we'll send out periodic messages to the
`eu.nebulouscloud.optimiser.controller.app_state` channel of the
following form:

```
{
  "when": "2024-04-17T07:54:00.169580700Z",
  "state": "DEPLOYING",
  "clusterState": { ... }
}
```

If the `clusterState` key exists, its value will be the return value of
SAL's getCluster endpoint.  The value format is defined in the
sal-common class `Cluster`; see
https://github.com/ow2-proactive/scheduling-abstraction-layer/blob/master/sal-common/src/main/java/org/ow2/proactive/sal/model/Cluster.java

Note that the `clusterState` key may be missing from the status message;
this is the case when deployment or redeployment has been triggered but
the optimiser-controller is still calculating the necessary changes to
the cluster.
  • Loading branch information
rudi committed Dec 3, 2024
1 parent 05339ca commit 660a045
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1023,4 +1023,24 @@ public void sendAppStatus(String appID, NebulousApp.State state) {
appStatusPublisher.send(msg, appID);
}

/**
* Broadcasts an application's state, with an auxiliary data value
* attached.<p>
*
* Messages are in the same form as sent by {@link #sendAppStatus(String,
* NebulousApp.State)} but with an additional {@code key: value} entry in
* the status message.
*
* @param appID the application id.
* @param state the state of the application.
* @param key the key of an additional entry in the status message
* @param value the value of an additional entry in the status message
*/
public void sendAppStatus(String appID, NebulousApp.State state, String key, JsonNode value) {
Map<String, Object> msg = Map.of(
"state", state.toString(),
key, mapper.convertValue(value, Map.class));
appStatusPublisher.send(msg, appID);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,24 @@ public boolean setStateDeploying() {
}
}

/**
* If app is in the DEPLOYING state, sends a DEPLOYING state message, with
* the cluster status as reported by SAL. Otherwise does nothing.
*
* @param clusterStatus a JSON node with the cluster status returned by
* the getCluster endpoint.
* @return true if status message sent, false otherwise.
*/
@Synchronized
public boolean sendDeploymentStatus(JsonNode clusterState) {
if (state == State.DEPLOYING) {
exnConnector.sendAppStatus(UUID, state, "clusterState", clusterState);
return true;
} else {
return false;
}
}

/** Set state from DEPLOYING to RUNNING and update app cluster information.
* @return false if not in state deploying, otherwise true. */
@Synchronized
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,9 @@ public static String createNodeName(String clusterName, String componentName, in
* @param appID The application id.
* @param clusterName The name of the cluster to poll.
*/
private static boolean waitForClusterDeploymentFinished(ExnConnector conn, String appID, String clusterName) {
private static boolean waitForClusterDeploymentFinished(ExnConnector conn, NebulousApp app) {
String appID = app.getUUID();
String clusterName = app.getClusterName();
final int pollInterval = 10000; // Check status every 10s
int callsSincePrinting = 0; // number of intervals since we last logged what we're doing
int failedCalls = 0;
Expand All @@ -310,6 +312,7 @@ private static boolean waitForClusterDeploymentFinished(ExnConnector conn, Strin
if (clusterState != null) {
JsonNode jsonState = clusterState.at("/status");
status = jsonState.isMissingNode() ? null : jsonState.asText();
app.sendDeploymentStatus(clusterState);
} else {
status = null;
}
Expand Down Expand Up @@ -611,7 +614,7 @@ public static void deployApplication(NebulousApp app, JsonNode kubevela) {
return;
}

if (!waitForClusterDeploymentFinished(conn, appUUID, clusterName)) {
if (!waitForClusterDeploymentFinished(conn, app)) {
log.error("Error while waiting for deployCluster to finish, trying to delete cluster {} and aborting deployment",
cluster);
app.setStateFailed();
Expand Down Expand Up @@ -844,7 +847,9 @@ public static void redeployApplication(NebulousApp app, ObjectNode updatedKubeve
log.info("Starting scaleout: {}", nodesToAdd);
Main.logFile("redeploy-scaleout-" + appUUID + ".json", nodesToAdd.toPrettyString());
conn.scaleOut(appUUID, clusterName, nodesToAdd);
waitForClusterDeploymentFinished(conn, appUUID, clusterName);
// TODO: check for error and set app state failed? (See the
// other call to waitForClusterDeploymentFinished)
waitForClusterDeploymentFinished(conn, app);
} else {
log.info("No nodes added, skipping scaleout");
}
Expand Down

0 comments on commit 660a045

Please sign in to comment.