From c1009f0b5228a12cc3c6e7b39a0d35b97749e3d9 Mon Sep 17 00:00:00 2001 From: Joe Schmitt Date: Fri, 1 Nov 2024 09:32:01 -0700 Subject: [PATCH] Retry on transient error --- .../RetryUtilities.cs | 12 +++++-- .../EgressTests.cs | 34 +++++++++++++------ 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/src/Tests/Microsoft.Diagnostics.Monitoring.TestCommon/RetryUtilities.cs b/src/Tests/Microsoft.Diagnostics.Monitoring.TestCommon/RetryUtilities.cs index d44ac1fc63d..b736d90e69f 100644 --- a/src/Tests/Microsoft.Diagnostics.Monitoring.TestCommon/RetryUtilities.cs +++ b/src/Tests/Microsoft.Diagnostics.Monitoring.TestCommon/RetryUtilities.cs @@ -27,7 +27,7 @@ public static void Retry(Action func, Func shouldRetry, ITestOu } } - public static async Task RetryAsync(Func func, Func shouldRetry, ITestOutputHelper outputHelper, int maxRetryCount = 3) + public static async Task RetryAsync(Func> func, Func shouldRetry, ITestOutputHelper outputHelper, int maxRetryCount = 3) { int attemptIteration = 0; while (true) @@ -36,13 +36,19 @@ public static async Task RetryAsync(Func func, Func shoul outputHelper.WriteLine("===== Attempt #{0} =====", attemptIteration); try { - await func(); - break; + return await func(); } catch (Exception ex) when (attemptIteration < maxRetryCount && shouldRetry(ex)) { } } } + + public static async Task RetryAsync(Func func, Func shouldRetry, ITestOutputHelper outputHelper, int maxRetryCount = 3) + => await RetryAsync(async () => + { + await func(); + return null; + }, shouldRetry, outputHelper, maxRetryCount); } } diff --git a/src/Tests/Microsoft.Diagnostics.Monitoring.Tool.FunctionalTests/EgressTests.cs b/src/Tests/Microsoft.Diagnostics.Monitoring.Tool.FunctionalTests/EgressTests.cs index 8ff9160aa35..4341bab18e4 100644 --- a/src/Tests/Microsoft.Diagnostics.Monitoring.Tool.FunctionalTests/EgressTests.cs +++ b/src/Tests/Microsoft.Diagnostics.Monitoring.Tool.FunctionalTests/EgressTests.cs @@ -235,7 +235,7 @@ await ScenarioRunner.SingleTarget( int processId = await appRunner.ProcessIdTask; OperationResponse response1 = await EgressTraceWithDelay(apiClient, processId); - OperationResponse response3 = await EgressTraceWithDelay(apiClient, processId); + OperationResponse response2 = await EgressTraceWithDelay(apiClient, processId); using HttpResponseMessage traceDirect1 = await TraceWithDelay(apiClient, processId); Assert.Equal(HttpStatusCode.OK, traceDirect1.StatusCode); @@ -253,10 +253,10 @@ await ScenarioRunner.SingleTarget( Assert.Equal(await egressDirect.Content.ReadAsStringAsync(), await traceDirect.Content.ReadAsStringAsync()); await CancelEgressOperation(apiClient, response1); - OperationResponse response4 = await EgressTraceWithDelay(apiClient, processId, delay: false); + OperationResponse response3 = await EgressTraceWithDelay(apiClient, processId, delay: false); + await CancelEgressOperation(apiClient, response2); await CancelEgressOperation(apiClient, response3); - await CancelEgressOperation(apiClient, response4); await appRunner.SendCommandAsync(TestAppScenarios.AsyncWait.Commands.Continue); }, @@ -426,9 +426,13 @@ await ScenarioRunner.SingleTarget( }); } - private static async Task TraceWithDelay(ApiClient client, int processId, bool delay = true) + private async Task TraceWithDelay(ApiClient client, int processId, bool delay = true) { - HttpResponseMessage message = await client.ApiCall(FormattableString.Invariant($"/trace?pid={processId}&durationSeconds=-1")); + HttpResponseMessage message = await RetryUtilities.RetryAsync( + func: () => client.ApiCall(FormattableString.Invariant($"/trace?pid={processId}&durationSeconds=-1")), + shouldRetry: IsTransientApiFailure, + outputHelper: _outputHelper); + if (delay) { await Task.Delay(TimeSpan.FromSeconds(1)); @@ -436,17 +440,22 @@ private static async Task TraceWithDelay(ApiClient client, return message; } - private static Task EgressDirect(ApiClient client, int processId) + private async Task EgressDirect(ApiClient client, int processId) { - return client.ApiCall(FormattableString.Invariant($"/trace?pid={processId}&egressProvider={FileProviderName}")); + return await RetryUtilities.RetryAsync( + func: () => client.ApiCall(FormattableString.Invariant($"/trace?pid={processId}&egressProvider={FileProviderName}")), + shouldRetry: IsTransientApiFailure, + outputHelper: _outputHelper); } - private static async Task EgressTraceWithDelay(ApiClient apiClient, int processId, bool delay = true) + private async Task EgressTraceWithDelay(ApiClient apiClient, int processId, bool delay = true) { try { - OperationResponse response = await apiClient.EgressTraceAsync(processId, durationSeconds: -1, FileProviderName); - return response; + return await RetryUtilities.RetryAsync( + func: () => apiClient.EgressTraceAsync(processId, durationSeconds: -1, FileProviderName), + shouldRetry: IsTransientApiFailure, + outputHelper: _outputHelper); } finally { @@ -473,6 +482,11 @@ private static void ValidateOperation(OperationStatus expected, OperationSummary Assert.Equal(expected.IsStoppable, summary.IsStoppable); } + // When the process could not be found (due to transient responsiveness issues), dotnet-monitor APIs will return a 400 status code. + private static bool IsTransientApiFailure(Exception ex) + => ex is ValidationProblemDetailsException validationException + && validationException.StatusCode == HttpStatusCode.BadRequest; + public void Dispose() { _tempDirectory.Dispose();