Skip to content

Commit 714cb94

Browse files
authored
refactor(agent): replace custom retry logic with Kubernetes backoff strategy (#446)
Signed-off-by: Samuel K <skevetter@pm.me>
1 parent 8b91304 commit 714cb94

2 files changed

Lines changed: 28 additions & 143 deletions

File tree

pkg/agent/binary.go

Lines changed: 0 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -10,135 +10,11 @@ import (
1010
"path/filepath"
1111
"runtime"
1212
"strings"
13-
"time"
1413

1514
devpodhttp "github.com/skevetter/devpod/pkg/http"
1615
"github.com/skevetter/log"
1716
)
1817

19-
type RetryConfig struct {
20-
MaxAttempts int
21-
InitialDelay time.Duration
22-
MaxDelay time.Duration
23-
Deadline time.Time
24-
}
25-
26-
type RetryFunc func(attempt int) error
27-
28-
func RetryWithDeadline(
29-
ctx context.Context,
30-
log log.Logger,
31-
cfg RetryConfig,
32-
fn RetryFunc,
33-
) error {
34-
cfg.applyDefaults()
35-
delay := cfg.InitialDelay
36-
37-
for attempt := 1; attempt <= cfg.MaxAttempts; attempt++ {
38-
if err := cfg.checkPreConditions(ctx, attempt-1); err != nil {
39-
return err
40-
}
41-
42-
err := fn(attempt)
43-
if err == nil {
44-
return nil
45-
}
46-
47-
if attempt == cfg.MaxAttempts {
48-
return fmt.Errorf("agent injection failed after %d attempts: %w", attempt, err)
49-
}
50-
51-
delay = cfg.handleRetry(&retryContext{
52-
ctx: ctx,
53-
log: log,
54-
attempt: attempt,
55-
err: err,
56-
delay: delay,
57-
})
58-
if delay == 0 {
59-
return ctx.Err()
60-
}
61-
}
62-
63-
return fmt.Errorf("retry loop exited unexpectedly")
64-
}
65-
66-
func (cfg *RetryConfig) checkPreConditions(ctx context.Context, attemptsCompleted int) error {
67-
if err := cfg.checkDeadline(attemptsCompleted); err != nil {
68-
return err
69-
}
70-
return checkContextCancelled(ctx)
71-
}
72-
73-
type retryContext struct {
74-
ctx context.Context
75-
log log.Logger
76-
attempt int
77-
err error
78-
delay time.Duration
79-
}
80-
81-
func (cfg *RetryConfig) handleRetry(rctx *retryContext) time.Duration {
82-
sleep := calculateSleep(rctx.delay, cfg)
83-
84-
rctx.log.Debugf("retrying attempt %d after %v: %v", rctx.attempt, sleep, rctx.err)
85-
86-
if err := sleepWithContext(rctx.ctx, sleep); err != nil {
87-
return 0
88-
}
89-
90-
newDelay := rctx.delay * 2
91-
return min(newDelay, cfg.MaxDelay)
92-
}
93-
94-
func (cfg *RetryConfig) applyDefaults() {
95-
if cfg.MaxAttempts <= 0 {
96-
cfg.MaxAttempts = 1
97-
}
98-
if cfg.InitialDelay <= 0 {
99-
cfg.InitialDelay = time.Second
100-
}
101-
if cfg.MaxDelay <= 0 {
102-
cfg.MaxDelay = 30 * time.Second
103-
}
104-
}
105-
106-
func (cfg *RetryConfig) checkDeadline(attemptsCompleted int) error {
107-
if cfg.Deadline.IsZero() || !time.Now().After(cfg.Deadline) {
108-
return nil
109-
}
110-
return fmt.Errorf("%w after %d attempts", ErrInjectTimeout, attemptsCompleted)
111-
}
112-
113-
func checkContextCancelled(ctx context.Context) error {
114-
select {
115-
case <-ctx.Done():
116-
return ctx.Err()
117-
default:
118-
return nil
119-
}
120-
}
121-
122-
func calculateSleep(delay time.Duration, cfg *RetryConfig) time.Duration {
123-
sleep := delay
124-
if !cfg.Deadline.IsZero() {
125-
remaining := time.Until(cfg.Deadline)
126-
if remaining > 0 && sleep > remaining {
127-
sleep = remaining
128-
}
129-
}
130-
return sleep
131-
}
132-
133-
func sleepWithContext(ctx context.Context, duration time.Duration) error {
134-
select {
135-
case <-ctx.Done():
136-
return ctx.Err()
137-
case <-time.After(duration):
138-
return nil
139-
}
140-
}
141-
14218
type BinarySource interface {
14319
GetBinary(ctx context.Context, arch string) (io.ReadCloser, error)
14420
SourceName() string

pkg/agent/inject.go

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ import (
1616
"github.com/skevetter/devpod/pkg/shell"
1717
"github.com/skevetter/devpod/pkg/version"
1818
"github.com/skevetter/log"
19+
"k8s.io/apimachinery/pkg/util/wait"
20+
"k8s.io/client-go/util/retry"
1921
)
2022

2123
var (
@@ -185,25 +187,32 @@ func InjectAgent(opts *InjectOptions) error {
185187

186188
vc := newVersionChecker(opts)
187189
bm := NewBinaryManager(opts.Log, opts.DownloadURL)
188-
return RetryWithDeadline(
189-
opts.Ctx,
190-
opts.Log,
191-
RetryConfig{
192-
MaxAttempts: 30,
193-
InitialDelay: 10 * time.Second,
194-
MaxDelay: 60 * time.Second,
195-
Deadline: time.Now().Add(opts.Timeout),
196-
},
197-
func(attempt int) error {
198-
return injectAgent(&injectContext{
199-
attempt: attempt,
200-
opts: opts,
201-
bm: bm,
202-
vc: vc,
203-
metrics: metrics,
204-
})
205-
},
206-
)
190+
191+
backoff := wait.Backoff{
192+
Steps: 30,
193+
Duration: 10 * time.Second,
194+
Factor: 1.5,
195+
Jitter: 0.1,
196+
Cap: 60 * time.Second,
197+
}
198+
199+
attempt := 0
200+
return retry.OnError(backoff, func(err error) bool {
201+
if opts.Ctx.Err() != nil {
202+
return false
203+
}
204+
opts.Log.Debugf("retrying attempt %d: %v", attempt, err)
205+
return true
206+
}, func() error {
207+
attempt++
208+
return injectAgent(&injectContext{
209+
attempt: attempt,
210+
opts: opts,
211+
bm: bm,
212+
vc: vc,
213+
metrics: metrics,
214+
})
215+
})
207216
}
208217

209218
func injectLocally(opts *InjectOptions) error {

0 commit comments

Comments
 (0)