From c6941a1a7e7bff58744d9955c4d243fe9e615d7a Mon Sep 17 00:00:00 2001 From: Tingluo Huang Date: Tue, 21 Jun 2016 23:06:38 -0400 Subject: [PATCH] Cancellation flow on Linux. --- src/Agent.Listener/JobDispatcher.cs | 41 +++++-- src/Agent.Listener/_project.json | 1 + src/Agent.Worker/_project.json | 1 + .../ProcessInvoker.cs | 101 ++++++++++++------ .../_project.json | 1 + src/Test/L0/ConstantGenerationL0.cs | 1 + src/Test/L0/Util/ApiUtilL0.cs | 1 + src/Test/_project.json | 1 + 8 files changed, 105 insertions(+), 43 deletions(-) diff --git a/src/Agent.Listener/JobDispatcher.cs b/src/Agent.Listener/JobDispatcher.cs index f95ac97762..4592058142 100644 --- a/src/Agent.Listener/JobDispatcher.cs +++ b/src/Agent.Listener/JobDispatcher.cs @@ -6,6 +6,7 @@ using System.IO; using System.Threading; using System.Threading.Tasks; +using Microsoft.VisualStudio.Services.WebApi; namespace Microsoft.VisualStudio.Services.Agent.Listener { @@ -478,6 +479,11 @@ private async Task RenewJobRequestAsync(int poolId, long requestId, Guid lockTok return; } } + else + { + Trace.Error("Catch exception during renew agent jobrequest."); + Trace.Error(ex); + } // retry TimeSpan remainingTime = TimeSpan.Zero; @@ -514,20 +520,33 @@ private async Task RenewJobRequestAsync(int poolId, long requestId, Guid lockTok private async Task CompleteJobRequestAsync(int poolId, JobRequestMessage message, Guid lockToken, TaskResult result) { var agentServer = HostContext.GetService(); - try + bool retrying = false; + while (true) { - using (var csFinishRequest = new CancellationTokenSource(ChannelTimeout)) + try { - await agentServer.FinishAgentRequestAsync(poolId, message.RequestId, lockToken, DateTime.UtcNow, result, csFinishRequest.Token); + using (var csFinishRequest = new CancellationTokenSource(ChannelTimeout)) + { + await agentServer.FinishAgentRequestAsync(poolId, message.RequestId, lockToken, DateTime.UtcNow, result, csFinishRequest.Token); + } + break; + } + catch (TaskAgentJobNotFoundException) + { + Trace.Info("TaskAgentJobNotFoundException received, job is no longer valid."); + break; + } + catch (TaskAgentJobTokenExpiredException) + { + Trace.Info("TaskAgentJobTokenExpiredException received, job is no longer valid."); + break; + } + catch (VssServiceResponseException ex) when (!retrying && ex.InnerException != null && ex.InnerException is ArgumentNullException) + { + Trace.Error("Retry on ArgumentNullException due a dotnet core bug in Linux/Mac."); + Trace.Error(ex); + retrying = true; } - } - catch (TaskAgentJobNotFoundException) - { - Trace.Info("TaskAgentJobNotFoundException received, job is no longer valid."); - } - catch (TaskAgentJobTokenExpiredException) - { - Trace.Info("TaskAgentJobTokenExpiredException received, job is no longer valid."); } // This should be the last thing to run so we don't notify external parties until actually finished diff --git a/src/Agent.Listener/_project.json b/src/Agent.Listener/_project.json index 7f20a686be..d51d91de85 100644 --- a/src/Agent.Listener/_project.json +++ b/src/Agent.Listener/_project.json @@ -56,6 +56,7 @@ "runtimes": { "win7-x64": { }, "ubuntu.14.04-x64": { }, + "ubuntu.16.04-x64": { }, "centos.7-x64": { }, "rhel.7.2-x64": { }, "osx.10.11-x64": { } diff --git a/src/Agent.Worker/_project.json b/src/Agent.Worker/_project.json index 775ed67139..90ca47629f 100644 --- a/src/Agent.Worker/_project.json +++ b/src/Agent.Worker/_project.json @@ -55,6 +55,7 @@ "runtimes": { "win7-x64": { }, "ubuntu.14.04-x64": { }, + "ubuntu.16.04-x64": { }, "centos.7-x64": { }, "rhel.7.2-x64": { }, "osx.10.11-x64": { } diff --git a/src/Microsoft.VisualStudio.Services.Agent/ProcessInvoker.cs b/src/Microsoft.VisualStudio.Services.Agent/ProcessInvoker.cs index 0f2ecac789..10b95e8c56 100644 --- a/src/Microsoft.VisualStudio.Services.Agent/ProcessInvoker.cs +++ b/src/Microsoft.VisualStudio.Services.Agent/ProcessInvoker.cs @@ -51,6 +51,8 @@ public sealed class ProcessInvoker : AgentService, IProcessInvoker private readonly TaskCompletionSource _processExitedCompletionSource = new TaskCompletionSource(); private readonly ConcurrentQueue _errorData = new ConcurrentQueue(); private readonly ConcurrentQueue _outputData = new ConcurrentQueue(); + private readonly TimeSpan _sigintTimeout = TimeSpan.FromSeconds(10); + private readonly TimeSpan _sigtermTimeout = TimeSpan.FromSeconds(5); public event EventHandler OutputDataReceived; public event EventHandler ErrorDataReceived; @@ -234,11 +236,40 @@ private void ProcessOutput() private async Task CancelAndKillProcessTree() { ArgUtil.NotNull(_proc, nameof(_proc)); + bool sigint_succeed = await SendSIGINT(_sigintTimeout); + if (sigint_succeed) + { + Trace.Info("Process cancelled successfully through Ctrl+C/SIGINT."); + return; + } + + bool sigterm_succeed = await SendSIGTERM(_sigtermTimeout); + if (sigterm_succeed) + { + Trace.Info("Process terminate successfully through Ctrl+Break/SIGTERM."); + return; + } + + Trace.Info("Kill entire process tree since both cancel and terminate signal has been ignored by the target process."); + KillProcessTree(); + } + + private async Task SendSIGINT(TimeSpan timeout) + { #if OS_WINDOWS - await WindowsCancelAndKillProcessTree(); + return await SendCtrlSignal(ConsoleCtrlEvent.CTRL_C, timeout); #else - await NixCancelAndKillProcessTree(); -#endif + return await SendSignal(Signals.SIGINT, timeout); +#endif + } + + private async Task SendSIGTERM(TimeSpan timeout) + { +#if OS_WINDOWS + return await SendCtrlSignal(ConsoleCtrlEvent.CTRL_BREAK, timeout); +#else + return await SendSignal(Signals.SIGTERM, timeout); +#endif } private void ProcessExitedHandler(object sender, EventArgs e) @@ -293,26 +324,6 @@ private void KillProcessTree() } #if OS_WINDOWS - private async Task WindowsCancelAndKillProcessTree() - { - bool ctrl_c_succeed = await SendCtrlSignal(ConsoleCtrlEvent.CTRL_C, TimeSpan.FromSeconds(10)); - if (ctrl_c_succeed) - { - Trace.Info("Process cancelled successfully through Ctrl+C."); - return; - } - - bool ctrl_break_succeed = await SendCtrlSignal(ConsoleCtrlEvent.CTRL_BREAK, TimeSpan.FromSeconds(5)); - if (ctrl_break_succeed) - { - Trace.Info("Process terminate successfully through Ctrl+Break."); - return; - } - - Trace.Info("Kill entire process tree since both cancel and terminate signal has been ignored by the target process."); - KillProcessTree(); - } - private async Task SendCtrlSignal(ConsoleCtrlEvent signal, TimeSpan timeout) { Trace.Info($"Sending {signal} to process {_proc.Id}."); @@ -545,31 +556,57 @@ public uint Size // Delegate type to be used as the Handler Routine for SetConsoleCtrlHandler private delegate Boolean ConsoleCtrlDelegate(ConsoleCtrlEvent CtrlType); #else - private async Task NixCancelAndKillProcessTree() + private async Task SendSignal(Signals signal, TimeSpan timeout) { - // TODO: replace Task.Delay(1) with Send SIGINT/SIGTERM/SIGKILL - await Task.Delay(1); - KillProcessTree(); + Trace.Info($"Sending {signal} to process {_proc.Id}."); + int errorCode = kill(_proc.Id, (int)signal); + if (errorCode != 0) + { + Trace.Info($"{signal} signal doesn't fire successfully."); + Trace.Error($"Error code: {errorCode}."); + return false; + } + + Trace.Info($"Successfully send {signal} to process {_proc.Id}."); + Trace.Info($"Waiting for process exit or {timeout.TotalSeconds} seconds after {signal} signal fired."); + var completedTask = await Task.WhenAny(Task.Delay(timeout), _processExitedCompletionSource.Task); + if (completedTask == _processExitedCompletionSource.Task) + { + Trace.Info("Process exit successfully."); + return true; + } + else + { + Trace.Info($"Process did not honor {signal} signal within {timeout.TotalSeconds} seconds."); + return false; + } } private void NixKillProcessTree() { try { - // TODO: Send Ctrl+C/Break to process group. if (!_proc.HasExited) { _proc.Kill(); } } - catch (InvalidOperationException) + catch (InvalidOperationException ex) { - // InvalidOperationException can occur if process got terminated by itself between - // HasExited and Kill() calls above. + Trace.Error("Ignore InvalidOperationException during Process.Kill()."); + Trace.Error(ex); } } -#endif + private enum Signals : int + { + SIGINT = 2, + SIGTERM = 15 + } + + [DllImport("libc", SetLastError = true)] + private static extern int kill(int pid, int sig); +#endif } public sealed class ProcessExitCodeException : Exception diff --git a/src/Microsoft.VisualStudio.Services.Agent/_project.json b/src/Microsoft.VisualStudio.Services.Agent/_project.json index b7bd4da202..bd1ff1453e 100644 --- a/src/Microsoft.VisualStudio.Services.Agent/_project.json +++ b/src/Microsoft.VisualStudio.Services.Agent/_project.json @@ -54,6 +54,7 @@ "runtimes": { "win7-x64": { }, "ubuntu.14.04-x64": { }, + "ubuntu.16.04-x64": { }, "centos.7-x64": { }, "rhel.7.2-x64": { }, "osx.10.11-x64": { } diff --git a/src/Test/L0/ConstantGenerationL0.cs b/src/Test/L0/ConstantGenerationL0.cs index 913b0fd39e..9e12e0e2dc 100644 --- a/src/Test/L0/ConstantGenerationL0.cs +++ b/src/Test/L0/ConstantGenerationL0.cs @@ -15,6 +15,7 @@ public void BuildConstantGenerateSucceed() { "win7-x64", "ubuntu.14.04-x64", + "ubuntu.16.04-x64", "centos.7-x64", "rhel.7.2-x64", "osx.10.11-x64" diff --git a/src/Test/L0/Util/ApiUtilL0.cs b/src/Test/L0/Util/ApiUtilL0.cs index a24bc0ac95..b9357f5847 100644 --- a/src/Test/L0/Util/ApiUtilL0.cs +++ b/src/Test/L0/Util/ApiUtilL0.cs @@ -85,6 +85,7 @@ public void VerifyUserAgentHasPlatformInfo() { "win7-x64", "ubuntu.14.04-x64", + "ubuntu.16.04-x64", "centos.7-x64", "rhel.7.2-x64", "osx.10.11-x64" diff --git a/src/Test/_project.json b/src/Test/_project.json index 0ce6084053..14b5a7ab46 100644 --- a/src/Test/_project.json +++ b/src/Test/_project.json @@ -51,6 +51,7 @@ "runtimes": { "win7-x64": { }, "ubuntu.14.04-x64": { }, + "ubuntu.16.04-x64": { }, "centos.7-x64": { }, "rhel.7.2-x64": { }, "osx.10.11-x64": { }