Skip to content

Commit 8403ffd

Browse files
committed
[shimV2] implements task, shimdiag service and wires up pod/container controllers
Implements the full TaskService and ShimDiag service methods for the LCOW containerd shim (containerd-shim-lcow-v2), replacing the previous `ErrNotImplemented` stubs with working lifecycle management. Signed-off-by: Harsh Rawat <harshrawat@microsoft.com>
1 parent 799fe74 commit 8403ffd

File tree

11 files changed

+831
-78
lines changed

11 files changed

+831
-78
lines changed

cmd/containerd-shim-lcow-v2/main.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//go:build windows
1+
//go:build windows && lcow
22

33
// containerd-shim-lcow-v2 is a containerd shim implementation for Linux Containers on Windows (LCOW).
44
package main
@@ -16,6 +16,7 @@ import (
1616
"github.com/Microsoft/hcsshim/internal/log"
1717
"github.com/Microsoft/hcsshim/internal/oc"
1818
"github.com/Microsoft/hcsshim/internal/shim"
19+
"github.com/Microsoft/hcsshim/osversion"
1920

2021
"github.com/containerd/errdefs"
2122
"github.com/sirupsen/logrus"
@@ -42,6 +43,13 @@ func main() {
4243
os.Exit(1)
4344
}
4445

46+
// This shim is supported on Windows Build 26100 and later.
47+
if osversion.Build() < osversion.V25H1Server {
48+
_, _ = fmt.Fprintf(os.Stderr,
49+
"%s: Windows version [%v] is not supported", service.ShimName, osversion.Build())
50+
os.Exit(1)
51+
}
52+
4553
// Start the shim manager event loop. The manager is responsible for
4654
// handling containerd start/stop lifecycle calls for the shim process.
4755
shim.Run(context.Background(), newShimManager(service.ShimName), func(c *shim.Config) {

cmd/containerd-shim-lcow-v2/manager.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//go:build windows
1+
//go:build windows && lcow
22

33
package main
44

@@ -217,19 +217,19 @@ func (m *shimManager) Stop(_ context.Context, id string) (resp shim.StopStatus,
217217
if sys, _ := hcs.OpenComputeSystem(ctx, id); sys != nil {
218218
defer sys.Close()
219219
if err := sys.Terminate(ctx); err != nil {
220-
fmt.Fprintf(os.Stderr, "failed to terminate '%s': %v", id, err)
220+
fmt.Fprintf(os.Stderr, "failed to terminate %q: %v", id, err)
221221
} else {
222222
ch := make(chan error, 1)
223223
go func() { ch <- sys.Wait() }()
224224
t := time.NewTimer(time.Second * 30)
225225
select {
226226
case <-t.C:
227227
sys.Close()
228-
return resp, fmt.Errorf("timed out waiting for '%s' to terminate", id)
228+
return resp, fmt.Errorf("timed out waiting for %q to terminate", id)
229229
case err := <-ch:
230230
t.Stop()
231231
if err != nil {
232-
fmt.Fprintf(os.Stderr, "failed to wait for '%s' to terminate: %v", id, err)
232+
fmt.Fprintf(os.Stderr, "failed to wait for %q to terminate: %v", id, err)
233233
}
234234
}
235235
}
@@ -249,20 +249,20 @@ func (m *shimManager) Stop(_ context.Context, id string) (resp shim.StopStatus,
249249
func limitedRead(filePath string, readLimitBytes int64) ([]byte, error) {
250250
f, err := os.Open(filePath)
251251
if err != nil {
252-
return nil, fmt.Errorf("limited read failed to open file: %s: %w", filePath, err)
252+
return nil, fmt.Errorf("open file %s: %w", filePath, err)
253253
}
254254
defer f.Close()
255255
fi, err := f.Stat()
256256
if err != nil {
257-
return []byte{}, fmt.Errorf("limited read failed during file stat: %s: %w", filePath, err)
257+
return []byte{}, fmt.Errorf("stat file %s: %w", filePath, err)
258258
}
259259
if fi.Size() < readLimitBytes {
260260
readLimitBytes = fi.Size()
261261
}
262262
buf := make([]byte, readLimitBytes)
263263
_, err = f.Read(buf)
264264
if err != nil {
265-
return []byte{}, fmt.Errorf("limited read failed during file read: %s: %w", filePath, err)
265+
return []byte{}, fmt.Errorf("read file %s: %w", filePath, err)
266266
}
267267
return buf, nil
268268
}

cmd/containerd-shim-lcow-v2/manager_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//go:build windows
1+
//go:build windows && lcow
22

33
package main
44

cmd/containerd-shim-lcow-v2/service/plugin/plugin.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//go:build windows
1+
//go:build windows && lcow
22

33
package plugin
44

@@ -7,6 +7,7 @@ import (
77
"os"
88

99
"github.com/Microsoft/hcsshim/cmd/containerd-shim-lcow-v2/service"
10+
"github.com/Microsoft/hcsshim/internal/logfields"
1011
"github.com/Microsoft/hcsshim/internal/shim"
1112
"github.com/Microsoft/hcsshim/internal/shimdiag"
1213
hcsversion "github.com/Microsoft/hcsshim/internal/version"
@@ -30,7 +31,7 @@ const (
3031
var svc *service.Service
3132

3233
func init() {
33-
// Provider ID: 64F6FC7F-8326-5EE8-B890-3734AE584136
34+
// Provider ID: A6BD4B70-8A0B-5913-5C8E-E2780DC7F06F
3435
// Provider and hook aren't closed explicitly, as they will exist until process exit.
3536
provider, err := etw.NewProvider(etwProviderName, etwCallback)
3637
if err != nil {
@@ -106,7 +107,7 @@ func etwCallback(sourceID guid.GUID, state etw.ProviderState, level etw.Level, m
106107
if err != nil {
107108
return
108109
}
109-
log := logrus.WithField("sandboxID", svc.SandboxID())
110+
log := logrus.WithField(logfields.SandboxID, svc.SandboxID())
110111
log.WithField("stack", resp.Stacks).Info("goroutine stack dump")
111112
if resp.GuestStacks != "" {
112113
log.WithField("stack", resp.GuestStacks).Info("guest stack dump")

cmd/containerd-shim-lcow-v2/service/service.go

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,25 @@
1-
//go:build windows
1+
//go:build windows && lcow
22

33
package service
44

55
import (
66
"context"
7+
"fmt"
78
"sync"
89

910
"github.com/Microsoft/hcsshim/internal/builder/vm/lcow"
11+
"github.com/Microsoft/hcsshim/internal/controller/pod"
1012
"github.com/Microsoft/hcsshim/internal/controller/vm"
1113
"github.com/Microsoft/hcsshim/internal/log"
1214
"github.com/Microsoft/hcsshim/internal/shim"
1315
"github.com/Microsoft/hcsshim/internal/shimdiag"
1416

1517
sandboxsvc "github.com/containerd/containerd/api/runtime/sandbox/v1"
16-
tasksvc "github.com/containerd/containerd/api/runtime/task/v3"
18+
tasksvc "github.com/containerd/containerd/api/runtime/task/v2"
1719
"github.com/containerd/containerd/v2/core/runtime"
1820
"github.com/containerd/containerd/v2/pkg/namespaces"
1921
"github.com/containerd/containerd/v2/pkg/shutdown"
22+
"github.com/containerd/errdefs"
2023
"github.com/containerd/ttrpc"
2124
)
2225

@@ -29,7 +32,7 @@ const (
2932
// All Service methods (sandbox, task, and shimdiag) operate on this shared struct.
3033
type Service struct {
3134
// mu is used to synchronize access to shared state within the Service.
32-
mu sync.Mutex
35+
mu sync.RWMutex
3336

3437
// publisher is used to publish events from the shim to containerd.
3538
publisher shim.Publisher
@@ -45,7 +48,15 @@ type Service struct {
4548
sandboxOptions *lcow.SandboxOptions
4649

4750
// vmController is responsible for managing the lifecycle of the underlying utility VM and its associated resources.
48-
vmController vm.Controller
51+
vmController *vm.Controller
52+
53+
// podControllers maps podID -> PodController for each active pod.
54+
podControllers map[string]*pod.Controller
55+
56+
// containerPodMapping maps containerID -> podID, allowing callers to look up
57+
// which pod a container belongs to and then retrieve the corresponding controller
58+
// from podControllers.
59+
containerPodMapping map[string]string
4960

5061
// shutdown manages graceful shutdown operations and allows registration of cleanup callbacks.
5162
shutdown shutdown.Service
@@ -56,10 +67,12 @@ var _ shim.TTRPCService = (*Service)(nil)
5667
// NewService creates a new instance of the Service with the shared state.
5768
func NewService(ctx context.Context, eventsPublisher shim.Publisher, sd shutdown.Service) *Service {
5869
svc := &Service{
59-
publisher: eventsPublisher,
60-
events: make(chan interface{}, 128), // Buffered channel for events
61-
vmController: vm.NewController(),
62-
shutdown: sd,
70+
publisher: eventsPublisher,
71+
events: make(chan interface{}, 128), // Buffered channel for events
72+
vmController: vm.New(),
73+
podControllers: make(map[string]*pod.Controller),
74+
containerPodMapping: make(map[string]string),
75+
shutdown: sd,
6376
}
6477

6578
go svc.forward(ctx, eventsPublisher)
@@ -83,23 +96,27 @@ func NewService(ctx context.Context, eventsPublisher shim.Publisher, sd shutdown
8396
// RegisterTTRPC registers the Task, Sandbox, and ShimDiag TTRPC services on
8497
// the provided server so that containerd can call into the shim over TTRPC.
8598
func (s *Service) RegisterTTRPC(server *ttrpc.Server) error {
86-
tasksvc.RegisterTTRPCTaskService(server, s)
99+
tasksvc.RegisterTaskService(server, s)
87100
sandboxsvc.RegisterTTRPCSandboxService(server, s)
88101
shimdiag.RegisterShimDiagService(server, s)
89102
return nil
90103
}
91104

105+
// ensureVMRunning returns an error if the VM is not in the running state.
106+
func (s *Service) ensureVMRunning() error {
107+
if state := s.vmController.State(); state != vm.StateRunning {
108+
return fmt.Errorf("vm is not running (state: %s): %w", state, errdefs.ErrFailedPrecondition)
109+
}
110+
return nil
111+
}
112+
92113
// SandboxID returns the unique identifier for the sandbox managed by this Service.
93114
func (s *Service) SandboxID() string {
94115
return s.sandboxID
95116
}
96117

97118
// send enqueues an event onto the internal events channel so that it can be
98119
// forwarded to containerd asynchronously by the forward goroutine.
99-
//
100-
// TODO: wire up send() for task events once task lifecycle methods are implemented.
101-
//
102-
//nolint:unused
103120
func (s *Service) send(evt interface{}) {
104121
s.events <- evt
105122
}

cmd/containerd-shim-lcow-v2/service/service_sandbox.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//go:build windows
1+
//go:build windows && lcow
22

33
package service
44

cmd/containerd-shim-lcow-v2/service/service_sandbox_internal.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//go:build windows
1+
//go:build windows && lcow
22

33
package service
44

@@ -124,7 +124,7 @@ func (s *Service) startSandboxInternal(ctx context.Context, request *sandbox.Sta
124124
}
125125
}
126126

127-
// VM controller ensures that only once of the Start call goes through.
127+
// VM controller ensures that only one of the Start calls goes through.
128128
err := s.vmController.StartVM(ctx, &vm.StartOptions{
129129
GCSServiceID: winio.VsockServiceID(prot.LinuxGcsVsockPort),
130130
ConfidentialOptions: confidentialOpts,
@@ -264,7 +264,7 @@ func (s *Service) pingSandboxInternal(_ context.Context, _ *sandbox.PingRequest)
264264
// The sandbox must already be in the stopped state before shutdown is accepted.
265265
func (s *Service) shutdownSandboxInternal(ctx context.Context, request *sandbox.ShutdownSandboxRequest) (*sandbox.ShutdownSandboxResponse, error) {
266266
if s.sandboxID != request.SandboxID {
267-
return &sandbox.ShutdownSandboxResponse{}, fmt.Errorf("sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID)
267+
return nil, fmt.Errorf("sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID)
268268
}
269269

270270
// Ensure the VM is terminated. If the VM is already terminated,
@@ -298,17 +298,17 @@ func (s *Service) shutdownSandboxInternal(ctx context.Context, request *sandbox.
298298
// It collects and returns runtime statistics from the vmController.
299299
func (s *Service) sandboxMetricsInternal(ctx context.Context, request *sandbox.SandboxMetricsRequest) (*sandbox.SandboxMetricsResponse, error) {
300300
if s.sandboxID != request.SandboxID {
301-
return &sandbox.SandboxMetricsResponse{}, fmt.Errorf("sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID)
301+
return nil, fmt.Errorf("sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID)
302302
}
303303

304304
stats, err := s.vmController.Stats(ctx)
305305
if err != nil {
306-
return &sandbox.SandboxMetricsResponse{}, fmt.Errorf("failed to get sandbox metrics: %w", err)
306+
return nil, fmt.Errorf("failed to get sandbox metrics: %w", err)
307307
}
308308

309309
anyStat, err := typeurl.MarshalAny(stats)
310310
if err != nil {
311-
return &sandbox.SandboxMetricsResponse{}, fmt.Errorf("failed to marshal sandbox metrics: %w", err)
311+
return nil, fmt.Errorf("failed to marshal sandbox metrics: %w", err)
312312
}
313313

314314
return &sandbox.SandboxMetricsResponse{

cmd/containerd-shim-lcow-v2/service/service_shimdiag.go

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//go:build windows
1+
//go:build windows && lcow
22

33
package service
44

@@ -7,11 +7,13 @@ import (
77
"os"
88
"strings"
99

10+
"github.com/Microsoft/hcsshim/internal/log"
1011
"github.com/Microsoft/hcsshim/internal/logfields"
1112
"github.com/Microsoft/hcsshim/internal/oc"
1213
"github.com/Microsoft/hcsshim/internal/shimdiag"
1314

1415
"github.com/containerd/errdefs/pkg/errgrpc"
16+
"github.com/sirupsen/logrus"
1517
"go.opencensus.io/trace"
1618
)
1719

@@ -34,6 +36,9 @@ func (s *Service) DiagExecInHost(ctx context.Context, request *shimdiag.ExecProc
3436
trace.StringAttribute(logfields.Stdout, request.Stdout),
3537
trace.StringAttribute(logfields.Stderr, request.Stderr))
3638

39+
// Set the sandbox ID in the logger context for all subsequent logs in this request.
40+
ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, s.sandboxID))
41+
3742
r, e := s.diagExecInHostInternal(ctx, request)
3843
return r, errgrpc.ToGRPC(e)
3944
}
@@ -49,6 +54,9 @@ func (s *Service) DiagTasks(ctx context.Context, request *shimdiag.TasksRequest)
4954
trace.StringAttribute(logfields.SandboxID, s.sandboxID),
5055
trace.BoolAttribute(logfields.Execs, request.Execs))
5156

57+
// Set the sandbox ID in the logger context for all subsequent logs in this request.
58+
ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, s.sandboxID))
59+
5260
r, e := s.diagTasksInternal(ctx, request)
5361
return r, errgrpc.ToGRPC(e)
5462
}
@@ -66,20 +74,26 @@ func (s *Service) DiagShare(ctx context.Context, request *shimdiag.ShareRequest)
6674
trace.StringAttribute(logfields.UVMPath, request.UvmPath),
6775
trace.BoolAttribute(logfields.ReadOnly, request.ReadOnly))
6876

77+
// Set the sandbox ID in the logger context for all subsequent logs in this request.
78+
ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, s.sandboxID))
79+
6980
r, e := s.diagShareInternal(ctx, request)
7081
return r, errgrpc.ToGRPC(e)
7182
}
7283

7384
// DiagStacks returns the stack traces of all goroutines in the shim.
7485
// This method is part of the instrumentation layer and business logic is included in diagStacksInternal.
75-
func (s *Service) DiagStacks(ctx context.Context, request *shimdiag.StacksRequest) (resp *shimdiag.StacksResponse, err error) {
86+
func (s *Service) DiagStacks(ctx context.Context, _ *shimdiag.StacksRequest) (resp *shimdiag.StacksResponse, err error) {
7687
ctx, span := oc.StartSpan(ctx, "DiagStacks")
7788
defer span.End()
7889
defer func() { oc.SetSpanStatus(span, err) }()
7990

8091
span.AddAttributes(trace.StringAttribute(logfields.SandboxID, s.sandboxID))
8192

82-
r, e := s.diagStacksInternal(ctx, request)
93+
// Set the sandbox ID in the logger context for all subsequent logs in this request.
94+
ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, s.sandboxID))
95+
96+
r, e := s.diagStacksInternal(ctx)
8397
return r, errgrpc.ToGRPC(e)
8498
}
8599

0 commit comments

Comments
 (0)