Skip to content

Commit

Permalink
Possible fix for Execution Timeout issues seen on Build Server (#748)
Browse files Browse the repository at this point in the history
  • Loading branch information
nathanwoctopusdeploy authored Dec 19, 2023
1 parent f8f8d12 commit da5e475
Show file tree
Hide file tree
Showing 9 changed files with 118 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,11 @@ void UnPauseOrRestartPortForwarder(TentacleType tentacleType, RpcCallStage rpcCa
Func<Task<(ScriptExecutionResult, List<ProcessOutput>)>> action = async () => await executeScriptTask;

Logger.Information("Waiting for the RPC Call to start");
await Wait.For(() => rpcCallHasStarted.Value, CancellationToken);
await Wait.For(
() => rpcCallHasStarted.Value,
TimeSpan.FromSeconds(30),
() => throw new Exception("RPC call did not start")
,CancellationToken);
Logger.Information("RPC Call has start");

await Task.Delay(TimeSpan.FromSeconds(6), CancellationToken);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,10 @@ void UnPauseOrRestartPortForwarder(TentacleType tentacleType, RpcCallStage rpcCa
cancelExecutionCancellationTokenSource.Token);

Logger.Information("Waiting for the RPC Call to start");
await Wait.For(() => rpcCallHasStarted.Value, CancellationToken);
await Wait.For(() => rpcCallHasStarted.Value,
TimeSpan.FromSeconds(30),
() => throw new Exception("RPC call did not start"),
CancellationToken);
Logger.Information("RPC Call has start");

var cancellationDuration = new Stopwatch();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@ public async Task WhenANetworkFailureOccurs_DuringStartScript_TheClientIsAbleToS
CancellationToken);

// Wait for the script to start.
await Wait.For(() => File.Exists(scriptHasStartFile), CancellationToken);
await Wait.For(() => File.Exists(scriptHasStartFile),
TimeSpan.FromSeconds(30),
() => throw new Exception("Script did not start"),
CancellationToken);

// Now it has started, kill active connections killing the start script request.
clientTentacle.PortForwarder.CloseExistingConnections();
Expand Down Expand Up @@ -111,7 +114,10 @@ public async Task WhenANetworkFailureOccurs_DuringGetStatus_TheClientIsAbleToSuc
async () => await clientTentacle.TentacleClient.ExecuteScript(startScriptCommand, CancellationToken, null, inMemoryLog),
CancellationToken);

await Wait.For(() => recordedUsages.For(nameof(IAsyncClientScriptServiceV2.GetStatusAsync)).LastException != null, CancellationToken);
await Wait.For(() => recordedUsages.For(nameof(IAsyncClientScriptServiceV2.GetStatusAsync)).LastException != null,
TimeSpan.FromSeconds(60),
() => throw new Exception("GetStatus did not error"),
CancellationToken);

// Let the script finish.
File.WriteAllText(waitForFile, "");
Expand Down Expand Up @@ -194,7 +200,10 @@ public async Task WhenANetworkFailureOccurs_DuringCancelScript_TheClientIsAbleTo
nameof(IAsyncClientScriptServiceV2.GetStatusAsync),
async (_, _) =>
{
await Wait.For(() => File.Exists(scriptIsRunningFlag), CancellationToken);
await Wait.For(() => File.Exists(scriptIsRunningFlag),
TimeSpan.FromSeconds(30),
() => throw new Exception("Script did not start"),
CancellationToken);
cts.Cancel();
})
.HookServiceMethod(tentacleConfigurationTestCase,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,19 @@ public async Task ScriptIsolationMutexFull_EnsuresTwoDifferentScriptsDontRunAtTh
var firstScriptExecution = Task.Run(async () => await tentacleClient.ExecuteScript(firstStartScriptCommand, CancellationToken));

// Wait for the first script to start running
await Wait.For(() => File.Exists(firstScriptStartFile), CancellationToken);
await Wait.For(() => File.Exists(firstScriptStartFile),
TimeSpan.FromSeconds(30),
() => throw new Exception("Script did not start"),
CancellationToken);
Logger.Information("First script is now running");

var secondScriptExecution = Task.Run(async () => await tentacleClient.ExecuteScript(secondStartScriptCommand, CancellationToken));

// Wait for the second script start script RPC call to return.
await Wait.For(() => recordedUsages.For(nameof(IAsyncClientScriptServiceV2.StartScriptAsync)).Completed == 2, CancellationToken);
await Wait.For(() => recordedUsages.For(nameof(IAsyncClientScriptServiceV2.StartScriptAsync)).Completed == 2,
TimeSpan.FromSeconds(60),
() => throw new Exception("Second execute script call did not complete"),
CancellationToken);

// Give Tentacle some more time to run the script (although it should not).
await Task.Delay(TimeSpan.FromSeconds(2));
Expand Down Expand Up @@ -104,12 +110,18 @@ public async Task ScriptIsolationMutexFull_IsOnlyExclusiveWhenFullAndWhenTheMute
var firstScriptExecution = Task.Run(async () => await tentacleClient.ExecuteScript(firstStartScriptCommand, CancellationToken));

// Wait for the first script to start running
await Wait.For(() => File.Exists(firstScriptStartFile), CancellationToken);
await Wait.For(() => File.Exists(firstScriptStartFile),
TimeSpan.FromSeconds(30),
() => throw new Exception("Script did not start running"),
CancellationToken);

var secondScriptExecution = Task.Run(async () => await tentacleClient.ExecuteScript(secondStartScriptCommand, CancellationToken));

// Wait for the second script to start
await Wait.For(() => File.Exists(secondScriptStart), CancellationToken);
await Wait.For(() => File.Exists(secondScriptStart),
TimeSpan.FromSeconds(30),
() => throw new Exception("Second script did not start"),
CancellationToken);
// Both scripts are now running in parallel

// Let the first script finish.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,10 @@ public async Task WhenRpcRetriesTimeOut_DuringCancelScript_TheRpcCallIsCancelled
null,
inMemoryLog);

await Wait.For(() => recordedUsages.For(nameof(IAsyncClientScriptServiceV2.GetStatusAsync)).Completed > 0, CancellationToken);
await Wait.For(() => recordedUsages.For(nameof(IAsyncClientScriptServiceV2.GetStatusAsync)).Completed > 0,
TimeSpan.FromSeconds(60),
() => throw new Exception("Script execution did not complete"),
CancellationToken);

// We cancel script execution via the cancellation token. This should trigger the CancelScript RPC call to be made
testCancellationTokenSource.Cancel();
Expand Down Expand Up @@ -519,7 +522,11 @@ public async Task WhenCancelScriptFails_AndTakesLongerThanTheRetryDuration_TheCa
inMemoryLog);

Func<Task> action = async () => await executeScriptTask;
await Wait.For(() => recordedUsages.For(nameof(IAsyncClientScriptServiceV2.GetStatusAsync)).Completed > 0, CancellationToken);
await Wait.For(
() => recordedUsages.For(nameof(IAsyncClientScriptServiceV2.GetStatusAsync)).Completed > 0,
TimeSpan.FromSeconds(60),
() => throw new Exception("Script Execution did not complete"),
CancellationToken);

// We cancel script execution via the cancellation token. This should trigger the CancelScript RPC call to be made
testCancellationTokenSource.Cancel();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,11 @@ public async Task WhenANetworkFailureOccurs_DuringGetStatus_WithATentacleThatOnl
File.WriteAllText(waitForFile, "");
var legacyTentacleClient = clientTentacle.LegacyTentacleClientBuilder().Build();

await Wait.For(async () => (await legacyTentacleClient.ScriptService.GetStatusAsync(scriptStatusRequest, new(CancellationToken)))
.State == ProcessState.Complete, CancellationToken);
await Wait.For(
async () => (await legacyTentacleClient.ScriptService.GetStatusAsync(scriptStatusRequest, new(CancellationToken))).State == ProcessState.Complete,
TimeSpan.FromSeconds(60),
() => throw new Exception("Script Execution did not complete"),
CancellationToken);

var allLogs = logs.JoinLogs();

Expand Down Expand Up @@ -198,8 +201,11 @@ public async Task WhenANetworkFailureOccurs_DuringCancelScript_WithATentacleThat
File.WriteAllText(waitForFile, "");
var legacyTentacleClient = clientTentacle.LegacyTentacleClientBuilder().Build();

await Wait.For(async () => (await legacyTentacleClient.ScriptService.GetStatusAsync(scriptStatusRequest, new(CancellationToken)))
.State == ProcessState.Complete, CancellationToken);
await Wait.For(
async () => (await legacyTentacleClient.ScriptService.GetStatusAsync(scriptStatusRequest, new(CancellationToken))).State == ProcessState.Complete,
TimeSpan.FromSeconds(60),
() => throw new Exception("Script Execution did not complete"),
CancellationToken);
}

[Test]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System;
using System.IO;
using System.Threading;
using Halibut.Util;
using NUnit.Framework;
using NUnit.Framework.Interfaces;
using Octopus.Tentacle.Tests.Integration.Util;
Expand All @@ -9,7 +10,7 @@
namespace Octopus.Tentacle.Tests.Integration.Support
{
[FixtureLifeCycle(LifeCycle.InstancePerTestCase)]
public abstract class IntegrationTest
public abstract class IntegrationTest : IDisposable
{
CancellationTokenSource? cancellationTokenSource;
public CancellationToken CancellationToken { get; private set; }
Expand All @@ -20,7 +21,11 @@ public void SetUp()
{
Logger = new SerilogLoggerBuilder().Build().ForContext(GetType());
Logger.Information("Test started");
cancellationTokenSource = new CancellationTokenSource();

// Time out the cancellation token so we cancel the test if it takes too long
// The IntegrationTestTimeout attribute will also cancel the test if it takes too long, but nunit will not call TearDown on the test
// in this scenario and so will fail to cancel the cancellation token and clean up the test gracefully, potentially leading to execution timeout errors
cancellationTokenSource = new CancellationTokenSource(IntegrationTestTimeout.TestTimeoutInMilliseconds() - TimeSpan.FromSeconds(5).Milliseconds);
CancellationToken = cancellationTokenSource.Token;
}

Expand All @@ -34,6 +39,7 @@ public void TearDown()
cancellationTokenSource?.Cancel();
Logger.Information("Disposing CancellationTokenSource");
cancellationTokenSource?.Dispose();
cancellationTokenSource = null;
Logger.Information("Finished Test Tearing Down");
}

Expand Down Expand Up @@ -97,5 +103,11 @@ public static string GetTempTentacleLogPath()
{
return Path.Combine(TestContext.CurrentContext.TestDirectory, TestContext.CurrentContext.Test.ID + ".tentaclelog");
}

public void Dispose()
{
Try.CatchingError(() => cancellationTokenSource?.Cancel(), _ => {});
Try.CatchingError(() => cancellationTokenSource?.Dispose(), _ => {});
}
}
}
34 changes: 32 additions & 2 deletions source/Octopus.Tentacle.Tests.Integration/Util/Wait.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,22 @@ namespace Octopus.Tentacle.Tests.Integration.Util
{
public static class Wait
{
public static async Task For(Func<bool> toBeTrue, CancellationToken cancellationToken)
public static async Task For(Func<bool> toBeTrue, TimeSpan timeout, Action onTimeoutOrCancellation, CancellationToken cancellationToken)
{
using var timeoutCancellationTokenSource = new CancellationTokenSource(timeout);
using var linkedCancellationTokenSource = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken, timeoutCancellationTokenSource.Token);

try
{
await For(toBeTrue, linkedCancellationTokenSource.Token);
}
catch (OperationCanceledException) when (linkedCancellationTokenSource.IsCancellationRequested)
{
onTimeoutOrCancellation();
}
}

static async Task For(Func<bool> toBeTrue, CancellationToken cancellationToken)
{
while (true)
{
Expand All @@ -19,7 +34,22 @@ public static async Task For(Func<bool> toBeTrue, CancellationToken cancellation
}
}

public static async Task For(Func<Task<bool>> toBeTrue, CancellationToken cancellationToken)
public static async Task For(Func<Task<bool>> toBeTrue, TimeSpan timeout, Action onTimeoutOrCancellation, CancellationToken cancellationToken)
{
using var timeoutCancellationTokenSource = new CancellationTokenSource(timeout);
using var linkedCancellationTokenSource = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken, timeoutCancellationTokenSource.Token);

try
{
await For(toBeTrue, linkedCancellationTokenSource.Token);
}
catch (OperationCanceledException) when (linkedCancellationTokenSource.IsCancellationRequested)
{
onTimeoutOrCancellation();
}
}

static async Task For(Func<Task<bool>> toBeTrue, CancellationToken cancellationToken)
{
while (true)
{
Expand Down
22 changes: 16 additions & 6 deletions source/Octopus.Tentacle.Tests.Integration/WorkspaceCleanerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
using FluentAssertions;
using NUnit.Framework;
using Octopus.Tentacle.CommonTestUtils.Builders;
using Octopus.Tentacle.Contracts;
using Octopus.Tentacle.Contracts.ClientServices;
using Octopus.Tentacle.Scripts;
using Octopus.Tentacle.Tests.Integration.Support;
using Octopus.Tentacle.Tests.Integration.Support.ExtensionMethods;
using Octopus.Tentacle.Tests.Integration.Util;
using Octopus.Tentacle.Tests.Integration.Util.Builders;
using Octopus.Tentacle.Tests.Integration.Util.Builders.Decorators;
Expand Down Expand Up @@ -41,12 +39,18 @@ public async Task WhenScriptServiceIsRunning_ThenWorkspaceIsNotDeleted(TentacleC

// Start task
var runningScriptTask = clientAndTentacle.TentacleClient.ExecuteScript(startScriptCommand, CancellationToken, null, new InMemoryLog());
await Wait.For(() => Directory.Exists(startScriptWorkspaceDirectory), CancellationToken);
await Wait.For(() => Directory.Exists(startScriptWorkspaceDirectory),
TimeSpan.FromSeconds(60),
() => throw new Exception("Workspace directory did not get created"),
CancellationToken);

// Ensure Workspace Cleaning Has Run
var existingWorkspaceDirectory = GivenExistingWorkspaceExists(existingHomeDirectory);
File.WriteAllText(ScriptWorkspace.GetLogFilePath(existingWorkspaceDirectory), "Existing log file");
await Wait.For(() => !Directory.Exists(existingWorkspaceDirectory), CancellationToken);
await Wait.For(() => !Directory.Exists(existingWorkspaceDirectory),
TimeSpan.FromSeconds(60),
() => throw new Exception("Workspace directory did not get deleted"),
CancellationToken);

Directory.Exists(startScriptWorkspaceDirectory).Should().BeTrue("Workspace should not have been cleaned up");

Expand Down Expand Up @@ -82,7 +86,10 @@ await AssertionExtensions

var workspaceDirectory = GetWorkspaceDirectoryPath(clientAndTentacle.RunningTentacle.HomeDirectory, startScriptCommand.ScriptTicket.TaskId);

await Wait.For(() => !Directory.Exists(workspaceDirectory), CancellationToken);
await Wait.For(() => !Directory.Exists(workspaceDirectory),
TimeSpan.FromSeconds(60),
() => throw new Exception("Workspace directory did not get deleted"),
CancellationToken);
}

[Test]
Expand All @@ -106,7 +113,10 @@ public async Task WhenTentacleStarts_WithWorkspacesOlderThanThreshold_ThenWorksp
})
.Build(CancellationToken);

await Wait.For(() => !Directory.Exists(existingWorkspaceDirectoryWithLogFile), CancellationToken);
await Wait.For(() => !Directory.Exists(existingWorkspaceDirectoryWithLogFile),
TimeSpan.FromSeconds(60),
() => throw new Exception("Workspace directory did not get deleted"),
CancellationToken);
Directory.Exists(existingWorkspaceDirectoryWithoutLogFile).Should().BeTrue();
}

Expand Down

0 comments on commit da5e475

Please sign in to comment.