Skip to content
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
9073427
Tentacle script abandonment (re-landed on main)
jimmyp Jun 2, 2026
4f3fc4d
Address review: remove abandon capability check, drop abandon metrics…
jimmyp Jun 2, 2026
e797a67
Apply review: concise AbandonScript summary on ITentacleClient
jimmyp Jun 2, 2026
06517d6
Address review: V1/K8s abandon throws + orchestrator is abandon-aware…
jimmyp Jun 2, 2026
ffa5b82
Test at the right boundary: drop duplicate/wrong-level abandon tests
jimmyp Jun 2, 2026
d03dcfb
Add AbandonScriptAsync to IAsyncScriptServiceV2 (unblocks server test…
jimmyp Jun 2, 2026
65a6db5
Fix race in abandon integration tests: assert ExecuteScript's result,…
jimmyp Jun 3, 2026
96babdd
Reword abandon wait/cleanup comments in SilentProcessRunner
jimmyp Jun 4, 2026
23059e4
Address PR review feedback on abandon
jimmyp Jun 4, 2026
7ee0fdb
Keep grandchild cleanup best-effort (can't assert a reparented non-ch…
jimmyp Jun 4, 2026
86ef03a
Gate abandon on the advertised capability, not just "is V2", + test r…
jimmyp Jun 5, 2026
d4e85ba
Restore @jimmyp's applied suggestions clobbered by my force-push
jimmyp Jun 5, 2026
5fd82a5
Put SupportsAbandon on ScriptServiceVersion (ScriptServiceVersion2Wit…
jimmyp Jun 5, 2026
21b8d2a
Address review: ScriptServiceVersion.IsV2, split escalation test, V1 …
jimmyp Jun 5, 2026
d195d65
Make the escalation split meaningful: each test asserts a distinct ef…
jimmyp Jun 5, 2026
a2e283b
Merge remote-tracking branch 'origin/main' into jimpelletier/eft-3295…
jimmyp Jun 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

477 changes: 477 additions & 0 deletions docs/superpowers/specs/2026-05-21-tentacle-script-abandon-design.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using FluentAssertions;
using Halibut.ServiceModel;
using NSubstitute;
using NUnit.Framework;
using Octopus.Tentacle.Client.Execution;
using Octopus.Tentacle.Client.Observability;
using Octopus.Tentacle.Client.Retries;
using Octopus.Tentacle.Client.Scripts;
using Octopus.Tentacle.Contracts;
using Octopus.Tentacle.Contracts.Capabilities;
using Octopus.Tentacle.Contracts.ClientServices;
using Octopus.Tentacle.Contracts.Logging;
using Octopus.Tentacle.Contracts.Observability;
using Octopus.Tentacle.Contracts.ScriptServiceV2;

namespace Octopus.Tentacle.Client.Tests
{
[TestFixture]
public class ScriptServiceVersionSelectorTests
{
[Test]
public async Task WhenTheTentacleAdvertisesAbandon_SupportsAbandonIsTrue()
{
var (version, supportsAbandon) = await SelectFor(
nameof(IScriptService), nameof(IFileTransferService), nameof(IScriptServiceV2), nameof(IAsyncClientScriptServiceV2.AbandonScriptAsync));

version.Should().Be(ScriptServiceVersion.ScriptServiceVersion2);
supportsAbandon.Should().BeTrue();
}

[Test]
public async Task WhenAnOlderV2TentacleDoesNotAdvertiseAbandon_SupportsAbandonIsFalse()
{
// Old V2 Tentacles are V2 but predate the abandon verb. Gating on "is V2" would wrongly let the
// orchestrator call AbandonScript on them; gating on the advertised capability does not.
var (version, supportsAbandon) = await SelectFor(
nameof(IScriptService), nameof(IFileTransferService), nameof(IScriptServiceV2));

version.Should().Be(ScriptServiceVersion.ScriptServiceVersion2);
supportsAbandon.Should().BeFalse();
}

[Test]
public async Task WhenTheTentacleOnlyHasV1_SupportsAbandonIsFalse()
{
var (version, supportsAbandon) = await SelectFor(
nameof(IScriptService), nameof(IFileTransferService));

version.Should().Be(ScriptServiceVersion.ScriptServiceVersion1);
supportsAbandon.Should().BeFalse();
}

static async Task<(ScriptServiceVersion Version, bool SupportsAbandon)> SelectFor(params string[] capabilities)
{
var capabilitiesService = Substitute.For<IAsyncClientCapabilitiesServiceV2>();
capabilitiesService.GetCapabilitiesAsync(Arg.Any<HalibutProxyRequestOptions>())
.Returns(Task.FromResult(new CapabilitiesResponseV2(new List<string>(capabilities))));

var selector = new ScriptServiceVersionSelector(
capabilitiesService,
Substitute.For<ITentacleClientTaskLog>(),
RpcCallExecutorFactory.Create(TimeSpan.Zero, Substitute.For<ITentacleClientObserver>()),
new TentacleClientOptions(new RpcRetrySettings(RetriesEnabled: false, RetryDuration: TimeSpan.Zero)),
ClientOperationMetricsBuilder.Start());

return await selector.DetermineScriptServiceVersionToUse(CancellationToken.None);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System.Linq;
using Octopus.Tentacle.Contracts.Capabilities;
using Octopus.Tentacle.Contracts.ClientServices;
using Octopus.Tentacle.Contracts.ScriptServiceV2;

namespace Octopus.Tentacle.Client.Capabilities
Expand All @@ -15,5 +16,17 @@ public static bool HasScriptServiceV2(this CapabilitiesResponseV2 capabilities)

return capabilities.SupportedCapabilities.Contains(nameof(IScriptServiceV2));
}

public static bool HasAbandonScript(this CapabilitiesResponseV2 capabilities)
{
if (capabilities?.SupportedCapabilities?.Any() != true)
{
return false;
}

// Both sides nameof their own AbandonScriptAsync. The strings match, and a rename on
// either side can't silently drift the capability check.
return capabilities.SupportedCapabilities.Contains(nameof(IAsyncClientScriptServiceV2.AbandonScriptAsync));
}
}
}
9 changes: 8 additions & 1 deletion source/Octopus.Tentacle.Client/EventDriven/CommandContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,22 @@ public class CommandContext
{
public CommandContext(ScriptTicket scriptTicket,
long nextLogSequence,
ScriptServiceVersion scripServiceVersionUsed)
ScriptServiceVersion scripServiceVersionUsed,
bool supportsAbandon = false)
{
ScriptTicket = scriptTicket;
NextLogSequence = nextLogSequence;
ScripServiceVersionUsed = scripServiceVersionUsed;
SupportsAbandon = supportsAbandon;
}

public ScriptTicket ScriptTicket { get; }
public long NextLogSequence { get; }
public ScriptServiceVersion ScripServiceVersionUsed { get; }

// Whether the Tentacle advertised the AbandonScript capability. Determined once from
// capabilities when the script starts and carried on the start result so the orchestrator
// can gate abandon escalation on it.
public bool SupportsAbandon { get; }
}
}
14 changes: 13 additions & 1 deletion source/Octopus.Tentacle.Client/ITentacleClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using Octopus.Tentacle.Client.Scripts.Models;
using Octopus.Tentacle.Contracts;
using Octopus.Tentacle.Contracts.Logging;
using Octopus.Tentacle.Contracts.ScriptServiceV2;

namespace Octopus.Tentacle.Client
{
Expand All @@ -31,7 +32,8 @@ Task<ScriptExecutionResult> ExecuteScript(
OnScriptStatusResponseReceived onScriptStatusResponseReceived,
OnScriptCompleted onScriptCompleted,
ITentacleClientTaskLog logger,
CancellationToken scriptExecutionCancellationToken);
CancellationToken scriptExecutionCancellationToken,
TimeSpan? abandonAfterCancellationPendingFor = null);

/// <summary>
/// Start the script.
Expand Down Expand Up @@ -59,6 +61,16 @@ Task<ScriptOperationExecutionResult> StartScript(ExecuteScriptCommand command,
/// <returns>The result, which includes the CommandContext for the next command</returns>
Task<ScriptOperationExecutionResult> CancelScript(CommandContext commandContext, ITentacleClientTaskLog logger);

/// <summary>
/// Abandon a running script. This attempts cancellation, but if necessary leaves the script
/// running in the OS but no longer has Tentacle watching or managing it.
/// </summary>
/// <param name="scriptTicket">The ticket of the script to abandon</param>
/// <param name="logger">Used to output user orientated log messages</param>
/// <param name="cancellationToken">Cancels the RPC call</param>
/// <returns>The current status snapshot of the script at the time abandon was processed</returns>
Task<ScriptStatusResponseV2> AbandonScript(ScriptTicket scriptTicket, ITentacleClientTaskLog logger, CancellationToken cancellationToken);

/// <summary>
/// Complete the script.
/// </summary>
Expand Down
22 changes: 18 additions & 4 deletions source/Octopus.Tentacle.Client/ScriptExecutor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,18 @@ public async Task<ScriptOperationExecutionResult> StartScript(ExecuteScriptComma
{
// Note: This class deliberately does not create OpenTelemetry Trace activities.
// It is a facade over other ScriptExecutor services, and the facade doesn't do anything interesting
var scriptServiceVersionToUse = await DetermineScriptServiceVersionToUse(cancellationToken);
var (scriptServiceVersionToUse, supportsAbandon) = await DetermineScriptServiceVersionToUse(cancellationToken);

var scriptExecutorFactory = CreateScriptExecutorFactory();
var scriptExecutor = scriptExecutorFactory.CreateScriptExecutor(scriptServiceVersionToUse);

return await scriptExecutor.StartScript(executeScriptCommand, startScriptIsBeingReAttempted, cancellationToken);
var result = await scriptExecutor.StartScript(executeScriptCommand, startScriptIsBeingReAttempted, cancellationToken);

// Carry whether the Tentacle advertised abandon onto the context the orchestrator reads, so it can
// gate escalation on the real capability rather than just "is V2".
var context = result.ContextForNextCommand;
var contextWithAbandon = new CommandContext(context.ScriptTicket, context.NextLogSequence, context.ScripServiceVersionUsed, supportsAbandon);
return new ScriptOperationExecutionResult(result.ScriptStatus, contextWithAbandon);
}

public async Task<ScriptOperationExecutionResult> GetStatus(CommandContext commandContext, CancellationToken cancellationToken)
Expand All @@ -69,7 +75,15 @@ public async Task<ScriptOperationExecutionResult> CancelScript(CommandContext co

return await scriptExecutor.CancelScript(commandContext);
}


public async Task<ScriptOperationExecutionResult> AbandonScript(CommandContext commandContext)
{
var scriptExecutorFactory = CreateScriptExecutorFactory();
var scriptExecutor = scriptExecutorFactory.CreateScriptExecutor(commandContext.ScripServiceVersionUsed);

return await scriptExecutor.AbandonScript(commandContext);
}

public async Task<ScriptStatus?> CompleteScript(CommandContext commandContext, CancellationToken cancellationToken)
{
var scriptExecutorFactory = CreateScriptExecutorFactory();
Expand All @@ -88,7 +102,7 @@ ScriptExecutorFactory CreateScriptExecutorFactory()
logger);
}

async Task<ScriptServiceVersion> DetermineScriptServiceVersionToUse(CancellationToken cancellationToken)
async Task<(ScriptServiceVersion Version, bool SupportsAbandon)> DetermineScriptServiceVersionToUse(CancellationToken cancellationToken)
{
try
{
Expand Down
7 changes: 7 additions & 0 deletions source/Octopus.Tentacle.Client/Scripts/IScriptExecutor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ Task<ScriptOperationExecutionResult> StartScript(ExecuteScriptCommand command,
/// <returns>The result, which includes the CommandContext for the next command</returns>
Task<ScriptOperationExecutionResult> CancelScript(CommandContext commandContext);

/// <summary>
/// Abandon the script. Signals Tentacle to stop waiting for the script to cancel and make the tentacle
/// available to run more scripts with the same isolation mutex.
/// </summary>
/// <param name="commandContext">The CommandContext from the previous command</param>
Task<ScriptOperationExecutionResult> AbandonScript(CommandContext commandContext);

/// <summary>
/// Complete the script.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,12 @@ async Task<KubernetesScriptStatusResponseV1> CancelScriptAction(CancellationToke
return Map(kubernetesScriptStatusResponseV1);
}

public Task<ScriptOperationExecutionResult> AbandonScript(CommandContext commandContext)
// KubernetesScriptServiceV1 has no abandon verb. The orchestrator only escalates to abandon
// when the Tentacle advertised the abandon capability (K8s agents never do), so it won't
// escalate here; reaching this is a bug.
=> throw new NotSupportedException("KubernetesScriptServiceV1 cannot abandon a script; it has no abandon verb. Cancel the script instead.");

public async Task<ScriptStatus?> CompleteScript(CommandContext lastStatusResponse, CancellationToken scriptExecutionCancellationToken)
{
using var activity = TentacleClient.ActivitySource.StartActivity($"{nameof(KubernetesScriptServiceV1Executor)}.{nameof(CompleteScript)}");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,20 @@ sealed class ObservingScriptOrchestrator
readonly OnScriptStatusResponseReceived onScriptStatusResponseReceived;
readonly OnScriptCompleted onScriptCompleted;
readonly IScriptExecutor scriptExecutor;
readonly TimeSpan? abandonAfterCancellationPendingFor;

public ObservingScriptOrchestrator(
IScriptObserverBackoffStrategy scriptObserverBackOffStrategy,
OnScriptStatusResponseReceived onScriptStatusResponseReceived,
OnScriptCompleted onScriptCompleted,
IScriptExecutor scriptExecutor)
IScriptExecutor scriptExecutor,
TimeSpan? abandonAfterCancellationPendingFor = null)
{
this.scriptExecutor = scriptExecutor;
this.scriptObserverBackOffStrategy = scriptObserverBackOffStrategy;
this.onScriptStatusResponseReceived = onScriptStatusResponseReceived;
this.onScriptCompleted = onScriptCompleted;
this.abandonAfterCancellationPendingFor = abandonAfterCancellationPendingFor;
}

public async Task<ScriptExecutionResult> ExecuteScript(ExecuteScriptCommand command, CancellationToken scriptExecutionCancellationToken)
Expand Down Expand Up @@ -80,12 +83,29 @@ async Task<ScriptOperationExecutionResult> ObserveUntilComplete(
var iteration = 0;
var cancellationIteration = 0;
var lastResult = startScriptResult;
var supportsAbandon = startScriptResult.ContextForNextCommand.SupportsAbandon;
var stopwatch = new Stopwatch();

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: rename to something like cancelling duration


while (lastResult.ScriptStatus.State != ProcessState.Complete)
{
if (scriptExecutionCancellationToken.IsCancellationRequested)
{
lastResult = await scriptExecutor.CancelScript(lastResult.ContextForNextCommand).ConfigureAwait(false);
// Record when cancellation first fired so we can escalate to abandon after the threshold.
if (!stopwatch.IsRunning)
{
stopwatch.Start();
}
Comment thread
jimmyp marked this conversation as resolved.

// Only escalate to abandon when the Tentacle advertised the abandon capability. Old V2
// Tentacles (pre-abandon) and V1/Kubernetes don't, so we keep cancelling rather than
// calling a verb they don't have.
var shouldAbandon = supportsAbandon
&& abandonAfterCancellationPendingFor.HasValue
&& stopwatch.Elapsed >= abandonAfterCancellationPendingFor.Value;

lastResult = shouldAbandon
? await scriptExecutor.AbandonScript(lastResult.ContextForNextCommand).ConfigureAwait(false)
: await scriptExecutor.CancelScript(lastResult.ContextForNextCommand).ConfigureAwait(false);
}
else
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,11 @@ public async Task<ScriptOperationExecutionResult> CancelScript(CommandContext co
return Map(response);
}

public Task<ScriptOperationExecutionResult> AbandonScript(CommandContext commandContext)
// ScriptServiceV1 has no abandon verb. The orchestrator only escalates to abandon when the
// Tentacle advertised the abandon capability, so it won't escalate here; reaching this is a bug.
=> throw new NotSupportedException("ScriptServiceV1 cannot abandon a script; it has no abandon verb. Cancel the script instead.");

public async Task<ScriptStatus?> CompleteScript(CommandContext lastStatusResponse, CancellationToken scriptExecutionCancellationToken)
{
try
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System.Threading.Tasks;
using Halibut;
using Halibut.ServiceModel;
using Octopus.Tentacle.Client.Capabilities;
using Octopus.Tentacle.Client.EventDriven;
using Octopus.Tentacle.Client.Execution;
using Octopus.Tentacle.Client.Observability;
Expand Down Expand Up @@ -173,6 +174,27 @@ async Task<ScriptStatusResponseV2> CancelScriptAction(CancellationToken ct)
return Map(scriptStatusResponseV2);
}

public async Task<ScriptOperationExecutionResult> AbandonScript(CommandContext commandContext)
{
using var activity = TentacleClient.ActivitySource.StartActivity($"{nameof(ScriptServiceV2Executor)}.{nameof(AbandonScript)}");

async Task<ScriptStatusResponseV2> AbandonScriptAction(CancellationToken ct)
{
var request = new AbandonScriptCommandV2(commandContext.ScriptTicket, commandContext.NextLogSequence);
return await clientScriptServiceV2.AbandonScriptAsync(request, new HalibutProxyRequestOptions(ct));
}

var scriptStatusResponseV2 = await rpcCallExecutor.Execute(
retriesEnabled: clientOptions.RpcRetrySettings.RetriesEnabled,
RpcCall.Create<IScriptServiceV2>(nameof(IScriptServiceV2.AbandonScript)),
AbandonScriptAction,
logger,
clientOperationMetricsBuilder,
// Like CancelScript, abandon must not be cancelled — it stops the script on Tentacle.
CancellationToken.None).ConfigureAwait(false);
return Map(scriptStatusResponseV2);
}

public async Task<ScriptStatus?> CompleteScript(CommandContext lastStatusResponse, CancellationToken scriptExecutionCancellationToken)
{
using var activity = TentacleClient.ActivitySource.StartActivity($"{nameof(ScriptServiceV2Executor)}.{nameof(CompleteScript)}");
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
namespace Octopus.Tentacle.Client.Scripts
namespace Octopus.Tentacle.Client.Scripts
{
public record ScriptServiceVersion(string Value)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ internal ScriptServiceVersionSelector(
this.clientOperationMetricsBuilder = clientOperationMetricsBuilder;
}

public async Task<ScriptServiceVersion> DetermineScriptServiceVersionToUse(CancellationToken cancellationToken)
public async Task<(ScriptServiceVersion Version, bool SupportsAbandon)> DetermineScriptServiceVersionToUse(CancellationToken cancellationToken)
{
logger.Verbose("Determining ScriptService version to use");

Expand All @@ -59,10 +59,13 @@ async Task<CapabilitiesResponseV2> GetCapabilitiesFunc(CancellationToken ct)
// It's implied (and tested) that GetCapabilities will only return Kubernetes or non-Kubernetes script services, never a mix
if (tentacleCapabilities.HasAnyKubernetesScriptService())
{
return DetermineKubernetesScriptServiceVersionToUse();
// Kubernetes agents never advertise abandon.
return (DetermineKubernetesScriptServiceVersionToUse(), false);
}

return DetermineShellScriptServiceVersionToUse(tentacleCapabilities);
// Abandon support is whether the Tentacle actually advertised the capability, not just that it
// is V2 — old V2 Tentacles (pre-abandon) are V2 but have no abandon verb.
return (DetermineShellScriptServiceVersionToUse(tentacleCapabilities), tentacleCapabilities.HasAbandonScript());
}

ScriptServiceVersion DetermineShellScriptServiceVersionToUse(CapabilitiesResponseV2 tentacleCapabilities)
Expand Down
Loading