Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 0 additions & 124 deletions pkg/agent/binary.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,135 +10,11 @@ import (
"path/filepath"
"runtime"
"strings"
"time"

devpodhttp "github.com/skevetter/devpod/pkg/http"
"github.com/skevetter/log"
)

type RetryConfig struct {
MaxAttempts int
InitialDelay time.Duration
MaxDelay time.Duration
Deadline time.Time
}

type RetryFunc func(attempt int) error

func RetryWithDeadline(
ctx context.Context,
log log.Logger,
cfg RetryConfig,
fn RetryFunc,
) error {
cfg.applyDefaults()
delay := cfg.InitialDelay

for attempt := 1; attempt <= cfg.MaxAttempts; attempt++ {
if err := cfg.checkPreConditions(ctx, attempt-1); err != nil {
return err
}

err := fn(attempt)
if err == nil {
return nil
}

if attempt == cfg.MaxAttempts {
return fmt.Errorf("agent injection failed after %d attempts: %w", attempt, err)
}

delay = cfg.handleRetry(&retryContext{
ctx: ctx,
log: log,
attempt: attempt,
err: err,
delay: delay,
})
if delay == 0 {
return ctx.Err()
}
}

return fmt.Errorf("retry loop exited unexpectedly")
}

func (cfg *RetryConfig) checkPreConditions(ctx context.Context, attemptsCompleted int) error {
if err := cfg.checkDeadline(attemptsCompleted); err != nil {
return err
}
return checkContextCancelled(ctx)
}

type retryContext struct {
ctx context.Context
log log.Logger
attempt int
err error
delay time.Duration
}

func (cfg *RetryConfig) handleRetry(rctx *retryContext) time.Duration {
sleep := calculateSleep(rctx.delay, cfg)

rctx.log.Debugf("retrying attempt %d after %v: %v", rctx.attempt, sleep, rctx.err)

if err := sleepWithContext(rctx.ctx, sleep); err != nil {
return 0
}

newDelay := rctx.delay * 2
return min(newDelay, cfg.MaxDelay)
}

func (cfg *RetryConfig) applyDefaults() {
if cfg.MaxAttempts <= 0 {
cfg.MaxAttempts = 1
}
if cfg.InitialDelay <= 0 {
cfg.InitialDelay = time.Second
}
if cfg.MaxDelay <= 0 {
cfg.MaxDelay = 30 * time.Second
}
}

func (cfg *RetryConfig) checkDeadline(attemptsCompleted int) error {
if cfg.Deadline.IsZero() || !time.Now().After(cfg.Deadline) {
return nil
}
return fmt.Errorf("%w after %d attempts", ErrInjectTimeout, attemptsCompleted)
}

func checkContextCancelled(ctx context.Context) error {
select {
case <-ctx.Done():
return ctx.Err()
default:
return nil
}
}

func calculateSleep(delay time.Duration, cfg *RetryConfig) time.Duration {
sleep := delay
if !cfg.Deadline.IsZero() {
remaining := time.Until(cfg.Deadline)
if remaining > 0 && sleep > remaining {
sleep = remaining
}
}
return sleep
}

func sleepWithContext(ctx context.Context, duration time.Duration) error {
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(duration):
return nil
}
}

type BinarySource interface {
GetBinary(ctx context.Context, arch string) (io.ReadCloser, error)
SourceName() string
Expand Down
47 changes: 28 additions & 19 deletions pkg/agent/inject.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ import (
"github.com/skevetter/devpod/pkg/shell"
"github.com/skevetter/devpod/pkg/version"
"github.com/skevetter/log"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/util/retry"
)

var (
Expand Down Expand Up @@ -185,25 +187,32 @@ func InjectAgent(opts *InjectOptions) error {

vc := newVersionChecker(opts)
bm := NewBinaryManager(opts.Log, opts.DownloadURL)
return RetryWithDeadline(
opts.Ctx,
opts.Log,
RetryConfig{
MaxAttempts: 30,
InitialDelay: 10 * time.Second,
MaxDelay: 60 * time.Second,
Deadline: time.Now().Add(opts.Timeout),
},
func(attempt int) error {
return injectAgent(&injectContext{
attempt: attempt,
opts: opts,
bm: bm,
vc: vc,
metrics: metrics,
})
},
)

backoff := wait.Backoff{
Steps: 30,
Duration: 10 * time.Second,
Factor: 1.5,
Jitter: 0.1,
Cap: 60 * time.Second,
}

attempt := 0
return retry.OnError(backoff, func(err error) bool {
if opts.Ctx.Err() != nil {
return false
}
opts.Log.Debugf("retrying attempt %d: %v", attempt, err)
return true
}, func() error {
attempt++
return injectAgent(&injectContext{
attempt: attempt,
opts: opts,
bm: bm,
vc: vc,
metrics: metrics,
})
})
Comment on lines +191 to +215
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# First, examine the file to understand the full context
head -n 30 pkg/agent/inject.go | tail -n +15

Repository: skevetter/devpod

Length of output: 488


🏁 Script executed:

# Check the imports to confirm which retry package is used
grep -n "import" pkg/agent/inject.go | head -n 20

Repository: skevetter/devpod

Length of output: 70


🏁 Script executed:

# Get the exact lines in question with more context
sed -n '180,220p' pkg/agent/inject.go

Repository: skevetter/devpod

Length of output: 1037


🏁 Script executed:

# Search for InjectOptions struct definition to understand Timeout field
rg "type InjectOptions" -A 10

Repository: skevetter/devpod

Length of output: 1150


🌐 Web query:

client-go v0.35.0 retry.OnError context handling deadline

💡 Result:

In client-go v0.35.0, the k8s.io/client-go/util/retry package does not provide retry.OnError (it only exposes conflict-focused helpers like RetryOnConflict). (pkg.go.dev)

So there is no context/deadline handling for retry.OnError in client-go v0.35.0, because that function isn’t part of that module/version.

If your goal is “retry but stop when ctx is canceled / its deadline expires”, use wait.ExponentialBackoffWithContext(ctx, backoff, condition), which explicitly returns when the context is canceled or hits its deadline. (pkg.go.dev)

Citations:


🏁 Script executed:

# Check go.mod for the client-go version
grep "client-go" go.mod

Repository: skevetter/devpod

Length of output: 141


🏁 Script executed:

# Search for the retry.OnError function definition
rg "func OnError" --type go

Repository: skevetter/devpod

Length of output: 42


🏁 Script executed:

# Check if wait.ExponentialBackoffWithContext is available
rg "ExponentialBackoffWithContext" --type go

Repository: skevetter/devpod

Length of output: 42


🏁 Script executed:

# Look for any custom retry definitions in the codebase
rg "package retry" --type go

Repository: skevetter/devpod

Length of output: 42


🏁 Script executed:

# Check the full imports section of inject.go
sed -n '1,40p' pkg/agent/inject.go

Repository: skevetter/devpod

Length of output: 995


🏁 Script executed:

# Check if the code compiles or if there are any build errors documented
find . -name "go.sum" -exec grep "client-go" {} \;

Repository: skevetter/devpod

Length of output: 401


🏁 Script executed:

# Try to find if OnError is defined anywhere in client-go imports or locally
rg "OnError" --type go

Repository: skevetter/devpod

Length of output: 597


🏁 Script executed:

# Check the k8s.io client-go package documentation for the actual exports
# First, see if there's any reference to what retry functions are available
rg "retry\." --type go | head -20

Repository: skevetter/devpod

Length of output: 456


🌐 Web query:

k8s.io/client-go/util/retry OnError function signature v0.35.0

💡 Result:

In k8s.io/client-go/util/retry v0.35.0, OnError has this signature:

func OnError(backoff wait.Backoff, errorFunc func(error) bool, fn func() error) error

(Some docs/name variants call the 2nd arg retriable, but the type/signature is the same.) [1][2]


🌐 Web query:

k8s.io/apimachinery/pkg/util/wait ExponentialBackoff context support

💡 Result:

wait.ExponentialBackoff does not take a context.Context (it uses ConditionFunc func() (bool, error)), so it can’t be directly canceled via context. (pkg.go.dev)

For context cancellation/deadlines, use wait.ExponentialBackoffWithContext:

  • Signature: func ExponentialBackoffWithContext(ctx context.Context, backoff Backoff, condition ConditionWithContextFunc) error (pkg.go.dev)
  • It returns early if ctx is canceled / hits deadline, if the condition returns an error, or when attempts are exhausted (ErrWaitTimeout). (pkg.go.dev)

Example:

err := wait.ExponentialBackoffWithContext(ctx, wait.Backoff{
    Duration: 100 * time.Millisecond,
    Factor:   2.0,
    Jitter:   0.1,
    Steps:    6,
    Cap:      2 * time.Second,
}, func(ctx context.Context) (bool, error) {
    // check condition; honor ctx if you do I/O
    return done, err
})

Citations:


🏁 Script executed:

# Check InjectOptions struct and how opts is used
sed -n '30,90p' pkg/agent/inject.go

Repository: skevetter/devpod

Length of output: 2159


🏁 Script executed:

# Verify that InjectOptions is passed by pointer in injectAgent
rg "func injectAgent" -A 5

Repository: skevetter/devpod

Length of output: 323


🏁 Script executed:

# Check if there are any other references to opts fields in the retry block that might be affected
sed -n '191,215p' pkg/agent/inject.go

Repository: skevetter/devpod

Length of output: 558


Overall retry can exceed InjectOptions.Timeout.

Timeout is documented as the maximum duration to wait for injection completion, but the retry loop runs for a fixed 30 steps regardless of elapsed time. The opts.Ctx.Err() check in the callback is insufficient because context expiration is only checked between retries, not during the backoff delays themselves. With the current backoff configuration (~27 minutes total sleep), injection can run far longer than the documented timeout (default 5 minutes). Bind retries to a timeout-aware context so that the overall time is actually capped by the timeout:

🛠️ Suggested fix
+	retryCtx := opts.Ctx
+	if opts.Timeout > 0 {
+		var cancel context.CancelFunc
+		retryCtx, cancel = context.WithTimeout(opts.Ctx, opts.Timeout)
+		defer cancel()
+	}
+	retryOpts := *opts
+	retryOpts.Ctx = retryCtx
+
 	backoff := wait.Backoff{
 		Steps:    30,
 		Duration: 10 * time.Second,
 		Factor:   1.5,
 		Jitter:   0.1,
 		Cap:      60 * time.Second,
 	}

 	attempt := 0
 	return retry.OnError(backoff, func(err error) bool {
-		if opts.Ctx.Err() != nil {
+		if retryCtx.Err() != nil {
 			return false
 		}
 		opts.Log.Debugf("retrying attempt %d: %v", attempt, err)
 		return true
 	}, func() error {
 		attempt++
 		return injectAgent(&injectContext{
 			attempt: attempt,
-			opts:    opts,
+			opts:    &retryOpts,
 			bm:      bm,
 			vc:      vc,
 			metrics: metrics,
 		})
 	})
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
backoff := wait.Backoff{
Steps: 30,
Duration: 10 * time.Second,
Factor: 1.5,
Jitter: 0.1,
Cap: 60 * time.Second,
}
attempt := 0
return retry.OnError(backoff, func(err error) bool {
if opts.Ctx.Err() != nil {
return false
}
opts.Log.Debugf("retrying attempt %d: %v", attempt, err)
return true
}, func() error {
attempt++
return injectAgent(&injectContext{
attempt: attempt,
opts: opts,
bm: bm,
vc: vc,
metrics: metrics,
})
})
retryCtx := opts.Ctx
if opts.Timeout > 0 {
var cancel context.CancelFunc
retryCtx, cancel = context.WithTimeout(opts.Ctx, opts.Timeout)
defer cancel()
}
retryOpts := *opts
retryOpts.Ctx = retryCtx
backoff := wait.Backoff{
Steps: 30,
Duration: 10 * time.Second,
Factor: 1.5,
Jitter: 0.1,
Cap: 60 * time.Second,
}
attempt := 0
return retry.OnError(backoff, func(err error) bool {
if retryCtx.Err() != nil {
return false
}
opts.Log.Debugf("retrying attempt %d: %v", attempt, err)
return true
}, func() error {
attempt++
return injectAgent(&injectContext{
attempt: attempt,
opts: &retryOpts,
bm: bm,
vc: vc,
metrics: metrics,
})
})
🤖 Prompt for AI Agents
In `@pkg/agent/inject.go` around lines 191 - 215, The retry loop can exceed
InjectOptions.Timeout because backoff sleeps aren't context-aware; create a
timeout-aware child context (ctx, cancel := context.WithTimeout(opts.Ctx,
opts.Timeout) and defer cancel()) and use a context-aware backoff call instead
of the current retry.OnError pattern: replace the retry.OnError block with a
wait.ExponentialBackoffWithContext (or equivalent context-aware backoff) that
uses the new ctx and invokes injectAgent(&injectContext{attempt: attempt, opts:
optsWithCtx, bm: bm, vc: vc, metrics: metrics}) where optsWithCtx is opts with
Ctx set to the new ctx; also ensure the retry predicate and any opts.Ctx.Err()
checks use this ctx so the overall operation is bounded by
InjectOptions.Timeout.

}

func injectLocally(opts *InjectOptions) error {
Expand Down
Loading