From 42834f96b3125fda7ab253b9837115744f2889d5 Mon Sep 17 00:00:00 2001 From: Samuel K Date: Thu, 5 Feb 2026 21:03:24 +0000 Subject: [PATCH 1/2] refactor(agent): replace custom retry logic with Kubernetes backoff strategy Signed-off-by: Samuel K --- pkg/agent/binary.go | 123 -------------------------------------------- pkg/agent/inject.go | 47 ++++++++++------- 2 files changed, 28 insertions(+), 142 deletions(-) diff --git a/pkg/agent/binary.go b/pkg/agent/binary.go index 4bfdb9dbc..a2865f601 100644 --- a/pkg/agent/binary.go +++ b/pkg/agent/binary.go @@ -10,134 +10,11 @@ import ( "path/filepath" "runtime" "strings" - "time" devpodhttp "github.com/skevetter/devpod/pkg/http" "github.com/skevetter/log" ) -type RetryConfig struct { - MaxAttempts int - InitialDelay time.Duration - MaxDelay time.Duration - Deadline time.Time -} - -type RetryFunc func(attempt int) error - -func RetryWithDeadline( - ctx context.Context, - log log.Logger, - cfg RetryConfig, - fn RetryFunc, -) error { - cfg.applyDefaults() - delay := cfg.InitialDelay - - for attempt := 1; attempt <= cfg.MaxAttempts; attempt++ { - if err := cfg.checkPreConditions(ctx, attempt-1); err != nil { - return err - } - - err := fn(attempt) - if err == nil { - return nil - } - - if attempt == cfg.MaxAttempts { - return fmt.Errorf("agent injection failed after %d attempts: %w", attempt, err) - } - - delay = cfg.handleRetry(&retryContext{ - ctx: ctx, - log: log, - attempt: attempt, - err: err, - delay: delay, - }) - if delay == 0 { - return ctx.Err() - } - } - - return fmt.Errorf("retry loop exited unexpectedly") -} - -func (cfg *RetryConfig) checkPreConditions(ctx context.Context, attemptsCompleted int) error { - if err := cfg.checkDeadline(attemptsCompleted); err != nil { - return err - } - return checkContextCancelled(ctx) -} - -type retryContext struct { - ctx context.Context - log log.Logger - attempt int - err error - delay time.Duration -} - -func (cfg *RetryConfig) handleRetry(rctx *retryContext) time.Duration { - sleep := calculateSleep(rctx.delay, cfg) - - rctx.log.Debugf("retrying attempt %d after %v: %v", rctx.attempt, sleep, rctx.err) - - if err := sleepWithContext(rctx.ctx, sleep); err != nil { - return 0 - } - - newDelay := rctx.delay * 2 - return min(newDelay, cfg.MaxDelay) -} - -func (cfg *RetryConfig) applyDefaults() { - if cfg.MaxAttempts <= 0 { - cfg.MaxAttempts = 1 - } - if cfg.InitialDelay <= 0 { - cfg.InitialDelay = time.Second - } - if cfg.MaxDelay <= 0 { - cfg.MaxDelay = 30 * time.Second - } -} - -func (cfg *RetryConfig) checkDeadline(attemptsCompleted int) error { - if cfg.Deadline.IsZero() || !time.Now().After(cfg.Deadline) { - return nil - } - return fmt.Errorf("%w after %d attempts", ErrInjectTimeout, attemptsCompleted) -} - -func checkContextCancelled(ctx context.Context) error { - select { - case <-ctx.Done(): - return ctx.Err() - default: - return nil - } -} - -func calculateSleep(delay time.Duration, cfg *RetryConfig) time.Duration { - sleep := delay - if !cfg.Deadline.IsZero() { - remaining := time.Until(cfg.Deadline) - if remaining > 0 && sleep > remaining { - sleep = remaining - } - } - return sleep -} - -func sleepWithContext(ctx context.Context, duration time.Duration) error { - select { - case <-ctx.Done(): - return ctx.Err() - case <-time.After(duration): - return nil - } -} type BinarySource interface { GetBinary(ctx context.Context, arch string) (io.ReadCloser, error) diff --git a/pkg/agent/inject.go b/pkg/agent/inject.go index b978df01b..5f4d7ae96 100644 --- a/pkg/agent/inject.go +++ b/pkg/agent/inject.go @@ -16,6 +16,8 @@ import ( "github.com/skevetter/devpod/pkg/shell" "github.com/skevetter/devpod/pkg/version" "github.com/skevetter/log" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" ) var ( @@ -185,25 +187,32 @@ func InjectAgent(opts *InjectOptions) error { vc := newVersionChecker(opts) bm := NewBinaryManager(opts.Log, opts.DownloadURL) - return RetryWithDeadline( - opts.Ctx, - opts.Log, - RetryConfig{ - MaxAttempts: 30, - InitialDelay: 10 * time.Second, - MaxDelay: 60 * time.Second, - Deadline: time.Now().Add(opts.Timeout), - }, - func(attempt int) error { - return injectAgent(&injectContext{ - attempt: attempt, - opts: opts, - bm: bm, - vc: vc, - metrics: metrics, - }) - }, - ) + + backoff := wait.Backoff{ + Steps: 30, + Duration: 10 * time.Second, + Factor: 1.5, + Jitter: 0.1, + Cap: 60 * time.Second, + } + + attempt := 0 + return retry.OnError(backoff, func(err error) bool { + if opts.Ctx.Err() != nil { + return false + } + opts.Log.Debugf("retrying attempt %d: %v", attempt, err) + return true + }, func() error { + attempt++ + return injectAgent(&injectContext{ + attempt: attempt, + opts: opts, + bm: bm, + vc: vc, + metrics: metrics, + }) + }) } func injectLocally(opts *InjectOptions) error { From 926bd938b8298ac8b0b935263717e98bc7e3a59d Mon Sep 17 00:00:00 2001 From: Samuel K Date: Thu, 5 Feb 2026 21:08:03 +0000 Subject: [PATCH 2/2] fix: additional space Signed-off-by: Samuel K --- pkg/agent/binary.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/agent/binary.go b/pkg/agent/binary.go index a2865f601..bb003c90b 100644 --- a/pkg/agent/binary.go +++ b/pkg/agent/binary.go @@ -15,7 +15,6 @@ import ( "github.com/skevetter/log" ) - type BinarySource interface { GetBinary(ctx context.Context, arch string) (io.ReadCloser, error) SourceName() string