Skip to content

Commit

Permalink
Add configurable timeout for node executor
Browse files Browse the repository at this point in the history
  • Loading branch information
Karl DeBisschop committed Dec 17, 2019
1 parent 564ee03 commit fa2dac6
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 10 deletions.
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ Features:

Configuration:

- Node executor and file copier do not require separate configuration.
- Node executor has configurable timeout.
- Authentication tokens for node executor and file copier are in password storage.
- The path for authentication tokens is specified in the node source configuration.
- Users will need to add those keys to storage in addition to entering them as password
Expand Down Expand Up @@ -55,7 +55,6 @@ Should be considered beta. Probably limited to text files.

## Known Bugs

- Node Executor fails with java.lang.ArrayIndexOutOfBoundsException after 15 minutes.
- File Copier handles only test files.

## Compatibility
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,19 @@

import javax.xml.bind.DatatypeConverter;

import com.dtolabs.rundeck.core.common.Framework;
import com.dtolabs.rundeck.core.common.INodeEntry;
import com.dtolabs.rundeck.core.execution.ExecutionContext;
import com.dtolabs.rundeck.core.execution.ExecutionListener;
import com.dtolabs.rundeck.core.execution.service.NodeExecutor;
import com.dtolabs.rundeck.core.execution.service.NodeExecutorResult;
import com.dtolabs.rundeck.core.execution.service.NodeExecutorResultImpl;
import com.dtolabs.rundeck.core.execution.utils.ResolverUtil;
import com.dtolabs.rundeck.core.execution.workflow.steps.StepFailureReason;
import com.dtolabs.rundeck.core.plugins.Plugin;
import com.dtolabs.rundeck.core.plugins.configuration.Describable;
import com.dtolabs.rundeck.core.plugins.configuration.Description;
import com.dtolabs.rundeck.core.plugins.configuration.PropertyUtil;
import com.dtolabs.rundeck.core.storage.ResourceMeta;
import com.dtolabs.rundeck.plugins.ServiceNameConstants;
import com.dtolabs.rundeck.plugins.util.DescriptionBuilder;
Expand All @@ -53,14 +56,28 @@ public class RancherNodeExecutorPlugin implements NodeExecutor, Describable {
private String accessKey;
private String secretKey;

private Framework framework;

static {
DescriptionBuilder builder = DescriptionBuilder.builder();
builder.name(RancherShared.SERVICE_PROVIDER_NAME);
builder.title("Rancher Node Executor");
builder.description("Executes a command on a remote rancher node.");

builder.property(PropertyUtil.integer(RancherShared.CONFIG_EXECUTOR_TIMEOUT, "Maximum execution time",
"Terminate execution after specified number of seconds", true, "300"));

builder.mapping(RancherShared.CONFIG_EXECUTOR_TIMEOUT, "project." + RancherShared.CONFIG_EXECUTOR_TIMEOUT);
builder.frameworkMapping(RancherShared.CONFIG_EXECUTOR_TIMEOUT,
"framework." + RancherShared.CONFIG_EXECUTOR_TIMEOUT);

DESC = builder.build();
}

public RancherNodeExecutorPlugin(Framework framework) {
this.framework = framework;
}

@Override
public Description getDescription() {
return DESC;
Expand All @@ -78,20 +95,23 @@ public NodeExecutorResult executeCommand(final ExecutionContext context, final S
}

ExecutionListener listener = context.getExecutionListener();

String url = nodeAttributes.get("execute");

Map<String, String> jobContext = context.getDataContext().get("job");
String temp = this.baseName(command, jobContext);

int timeout = ResolverUtil.resolveIntProperty(RancherShared.CONFIG_EXECUTOR_TIMEOUT, 300, node,
context.getFramework().getFrameworkProjectMgr().getFrameworkProject(context.getFrameworkProject()),
context.getFramework());
try {
RancherWebSocketListener.runJob(url, accessKey, secretKey, command, listener, temp);
RancherWebSocketListener.runJob(url, accessKey, secretKey, command, listener, temp, timeout);
} catch (IOException e) {
return NodeExecutorResultImpl.createFailure(StepFailureReason.IOFailure, e.getMessage(), node);
} catch (InterruptedException e) {
return NodeExecutorResultImpl.createFailure(StepFailureReason.Interrupted, e.getMessage(), node);
}

String[] pidFile = this.readLogFile(temp + ".pid", url).split(" +");
if (pidFile.length > 1 && Integer.parseInt(pidFile[1]) == 0) {
return NodeExecutorResultImpl.createSuccess(node);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ public class RancherShared {
public static final String CONFIG_TAGS = "tags";
public static final String CONFIG_LABELS_INCLUDE_ATTRIBUTES = "labels-copied-to-attribs";
public static final String CONFIG_LABELS_INCLUDE_TAGS = "labels-copied-to-tags";
public static final String CONFIG_EXECUTOR_TIMEOUT = "rancher-node-executor-timeout";

public static final String SERVICE_PROVIDER_NAME = "rancher";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
*/
public final class RancherWebSocketListener extends WebSocketListener {

// Try to use a single HTTP client across methds.
// Try to use a single HTTP client across methods.
private OkHttpClient client;

//URL of the Rancher API end point.
Expand Down Expand Up @@ -124,15 +124,15 @@ public void onFailure(WebSocket webSocket, Throwable t, Response response) {
* @throws InterruptedException
*/
public static void runJob(String url, String accessKey, String secretKey, String[] command,
ExecutionListener listener, String temp) throws IOException, InterruptedException {
ExecutionListener listener, String temp, int timeout) throws IOException, InterruptedException {
String file = " >>" + temp + ".pid; ";
// Prefix STDERR lines with STDERR_TOK to decode in logging step.
String job = "( " + String.join(" ", command) + ") 2> >(while read line;do echo \"" + STDERR_TOK
+ " $line\";done) ;";
String remote = "printf $$" + file + job + "printf ' %s' $?" + file;
// Note that bash is required to support adding a prefix token to STDERR.
String[] cmd = { "bash", "-c", remote };
new RancherWebSocketListener().runJob(url, accessKey, secretKey, listener, cmd);
new RancherWebSocketListener().runJob(url, accessKey, secretKey, listener, cmd, timeout);
}

/**
Expand Down Expand Up @@ -181,7 +181,7 @@ public static void putFile(String url, String accessKey, String secretKey, Input
* @throws IOException
* @throws InterruptedException
*/
private void runJob(String url, String accessKey, String secretKey, ExecutionListener listener, String[] command)
private void runJob(String url, String accessKey, String secretKey, ExecutionListener listener, String[] command, int timeout)
throws IOException, InterruptedException {
client = new OkHttpClient.Builder().pingInterval(50, TimeUnit.SECONDS).callTimeout(0, TimeUnit.HOURS).build();

Expand All @@ -200,7 +200,8 @@ private void runJob(String url, String accessKey, String secretKey, ExecutionLis

// Trigger shutdown of the dispatcher's executor so process exits cleanly.
client.dispatcher().executorService().shutdown();
client.dispatcher().executorService().awaitTermination(900, TimeUnit.SECONDS);
// Any job will terminate after this time. Should be configurable?
client.dispatcher().executorService().awaitTermination(timeout, TimeUnit.SECONDS);
}

/**
Expand Down

0 comments on commit fa2dac6

Please sign in to comment.