Skip to content
This repository was archived by the owner on Feb 23, 2026. It is now read-only.
Draft
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions pkg/containerd/opts/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,25 @@ func WithNewSnapshot(id string, i containerd.Image) containerd.NewContainerOpts
}
}

// WithRemappedSnapshot wraps `containerd.WithRemappedSnapshot` so that if creating the
// snapshot fails we make sure the image is actually unpacked and and retry.
func WithRemappedSnapshot(id string, i containerd.Image, uid, gid uint32) containerd.NewContainerOpts {
f := containerd.WithRemappedSnapshot(id, i, uid, gid)
return func(ctx context.Context, client *containerd.Client, c *containers.Container) error {
if err := f(ctx, client, c); err != nil {
if !errdefs.IsNotFound(err) {
return err
}

if err := i.Unpack(ctx, c.Snapshotter); err != nil {
return errors.Wrap(err, "error unpacking image")
}
return f(ctx, client, c)
}
return nil
}
}

// WithVolumes copies ownership of volume in rootfs to its corresponding host path.
// It doesn't update runtime spec.
// The passed in map is a host path to container path map for all volumes.
Expand Down
10 changes: 10 additions & 0 deletions pkg/containerd/opts/spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,9 @@ func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, pid uint32
if namespaces.GetPid() != runtime.NamespaceMode_CONTAINER {
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.PIDNamespace, Path: GetPIDNamespace(pid)}))
}
if namespaces.GetUser() != runtime.NamespaceMode_CONTAINER {
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.UserNamespace, Path: GetUserNamespace(pid)}))
}
return oci.Compose(opts...)
}

Expand Down Expand Up @@ -722,6 +725,8 @@ const (
utsNSFormat = "/proc/%v/ns/uts"
// pidNSFormat is the format of pid namespace of a process.
pidNSFormat = "/proc/%v/ns/pid"
// userNSFormat is the format of user namespace of a process.
userNSFormat = "/proc/%v/ns/user"
)

// GetNetworkNamespace returns the network namespace of a process.
Expand All @@ -743,3 +748,8 @@ func GetUTSNamespace(pid uint32) string {
func GetPIDNamespace(pid uint32) string {
return fmt.Sprintf(pidNSFormat, pid)
}

// GetUserNamespace returns the user namespace of a process.
func GetUserNamespace(pid uint32) string {
return fmt.Sprintf(userNSFormat, pid)
}
55 changes: 39 additions & 16 deletions pkg/netns/netns.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ const nsRunDir = "/var/run/netns"

// newNS creates a new persistent (bind-mounted) network namespace and returns the
// path to the network namespace.
func newNS() (nsPath string, err error) {
// If pid is non-zero, reuses the netns from this pid
func newNS(pid uint32) (nsPath string, err error) {
b := make([]byte, 16)
if _, err := rand.Reader.Read(b); err != nil {
return "", errors.Wrap(err, "failed to generate random netns name")
Expand Down Expand Up @@ -94,26 +95,33 @@ func newNS() (nsPath string, err error) {
// Don't unlock. By not unlocking, golang will kill the OS thread when the
// goroutine is done (for go1.10+)

var origNS cnins.NetNS
origNS, err = cnins.GetNS(getCurrentThreadNetNSPath())
if err != nil {
return
}
defer origNS.Close()
var procNsPath string
if pid == 0 {
var origNS cnins.NetNS
origNS, err = cnins.GetNS(getCurrentThreadNetNSPath())
if err != nil {
return
}
defer origNS.Close()

// create a new netns on the current thread
err = unix.Unshare(unix.CLONE_NEWNET)
if err != nil {
return
}
// create a new netns on the current thread
err = unix.Unshare(unix.CLONE_NEWNET)
if err != nil {
return
}

// Put this thread back to the orig ns, since it might get reused (pre go1.10)
defer origNS.Set() // nolint: errcheck
// Put this thread back to the orig ns, since it might get reused (pre go1.10)
defer origNS.Set() // nolint: errcheck

procNsPath = getCurrentThreadNetNSPath()
} else {
procNsPath = getNetNSPathFromPID(pid)
}

// bind mount the netns from the current thread (from /proc) onto the
// mount point. This causes the namespace to persist, even when there
// are no threads in the ns.
err = unix.Mount(getCurrentThreadNetNSPath(), nsPath, "none", unix.MS_BIND, "")
err = unix.Mount(procNsPath, nsPath, "none", unix.MS_BIND, "")
if err != nil {
err = errors.Wrapf(err, "failed to bind mount ns at %s", nsPath)
}
Expand Down Expand Up @@ -156,14 +164,29 @@ func getCurrentThreadNetNSPath() string {
return fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid())
}

func getNetNSPathFromPID(pid uint32) string {
return fmt.Sprintf("/proc/%d/ns/net", pid)
}

// NetNS holds network namespace.
type NetNS struct {
path string
}

// NewNetNS creates a network namespace.
func NewNetNS() (*NetNS, error) {
path, err := newNS()
path, err := newNS(0)
if err != nil {
return nil, errors.Wrap(err, "failed to setup netns")
}
return &NetNS{path: path}, nil
Comment thread
mauriciovasquezbernal marked this conversation as resolved.
}

// NetNSFromPID reuses the existing network namespace from a process and bind
// mount it in the same way as NewNetNS() so we can run Do() in the same way
// regardless of the lifecycle of the pid.
func NetNSFromPID(pid uint32) (*NetNS, error) {
path, err := newNS(pid)
if err != nil {
return nil, errors.Wrap(err, "failed to setup netns")
}
Expand Down
36 changes: 34 additions & 2 deletions pkg/server/container_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,26 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta

log.G(ctx).Debugf("Container %q spec: %#+v", id, spew.NewFormatter(spec))

securityContext := config.GetLinux().GetSecurityContext()

var snapshotterOption containerd.NewContainerOpts
switch securityContext.GetNamespaceOptions().GetUser() {
case runtime.NamespaceMode_CONTAINER:
return nil, errors.New("unsupported user namespace mode: CONTAINER")
case runtime.NamespaceMode_NODE:
snapshotterOption = customopts.WithNewSnapshot(id, containerdImage)
case runtime.NamespaceMode_NODE_WIDE_REMAPPED:
fallthrough
Comment thread
mauriciovasquezbernal marked this conversation as resolved.
case runtime.NamespaceMode_POD:
shiftID := UsernsMapping.HostID
if UsernsMapping.ContainerID != 0 {
return nil, errors.New("unsupported uid shift")
}
snapshotterOption = customopts.WithRemappedSnapshot(id, containerdImage, shiftID, shiftID)
default:
return nil, errors.Wrapf(err, "invalid user namespace option %d for sandbox %q", securityContext.GetNamespaceOptions().GetUser(), id)
}

// Set snapshotter before any other options.
opts := []containerd.NewContainerOpts{
containerd.WithSnapshotter(c.config.ContainerdConfig.Snapshotter),
Expand All @@ -183,7 +203,7 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
// the runtime (runc) a chance to modify (e.g. to create mount
// points corresponding to spec.Mounts) before making the
// rootfs readonly (requested by spec.Root.Readonly).
customopts.WithNewSnapshot(id, containerdImage),
snapshotterOption,
}

if len(volumeMounts) > 0 {
Expand Down Expand Up @@ -220,7 +240,6 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
}()

var specOpts []oci.SpecOpts
securityContext := config.GetLinux().GetSecurityContext()
// Set container username. This could only be done by containerd, because it needs
// access to the container rootfs. Pass user name to containerd, and let it overwrite
// the spec for us.
Expand Down Expand Up @@ -438,6 +457,19 @@ func (c *criService) generateContainerSpec(id string, sandboxID string, sandboxP
customopts.WithAnnotation(annotations.SandboxID, sandboxID),
)

switch securityContext.GetNamespaceOptions().GetUser() {
case runtime.NamespaceMode_NODE:
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UserNamespace))
case runtime.NamespaceMode_NODE_WIDE_REMAPPED:
fallthrough
Comment thread
mauriciovasquezbernal marked this conversation as resolved.
case runtime.NamespaceMode_POD:
specOpts = append(specOpts, oci.WithUserNamespace(
UsernsMapping.ContainerID,
UsernsMapping.HostID,
UsernsMapping.Size,
))
}

return runtimeSpec(id, specOpts...)
}

Expand Down
19 changes: 18 additions & 1 deletion pkg/server/instrumented_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func (in *instrumentedService) RunPodSandbox(ctx context.Context, r *runtime.Run
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Infof("RunPodsandbox for %+v", r.GetConfig().GetMetadata())
log.G(ctx).Infof("RunPodSandbox for %+v", r.GetConfig().GetMetadata())
defer func() {
if err != nil {
log.G(ctx).WithError(err).Errorf("RunPodSandbox for %+v failed, error", r.GetConfig().GetMetadata())
Expand Down Expand Up @@ -441,6 +441,23 @@ func (in *instrumentedService) Status(ctx context.Context, r *runtime.StatusRequ
return res, errdefs.ToGRPC(err)
}

// GetRuntimeConfigInfo returns the runtime config.
func (in *instrumentedService) GetRuntimeConfigInfo(ctx context.Context, r *runtime.GetRuntimeConfigInfoRequest) (res *runtime.GetRuntimeConfigInfoResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
}
log.G(ctx).Tracef("GetRuntimeConfigInfo")
defer func() {
if err != nil {
log.G(ctx).WithError(err).Error("GetRuntimeConfigInfo failed")
} else {
log.G(ctx).Tracef("GetRuntimeConfigInfo returns status %+v", res.GetRuntimeConfig())
Comment thread
mauriciovasquezbernal marked this conversation as resolved.
}
}()
res, err = in.c.GetRuntimeConfigInfo(ctrdutil.WithNamespace(ctx), r)
return res, errdefs.ToGRPC(err)
}

func (in *instrumentedService) Version(ctx context.Context, r *runtime.VersionRequest) (res *runtime.VersionResponse, err error) {
if err := in.checkInitialized(); err != nil {
return nil, err
Expand Down
60 changes: 60 additions & 0 deletions pkg/server/runtime_config_info.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
Copyright 2017-2020 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package server

import (
"golang.org/x/net/context"

runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
)

var UsernsMapping = &runtimespec.LinuxIDMapping{
ContainerID: uint32(0),
HostID: uint32(100000),
Size: uint32(65535),
}

func (c *criService) GetRuntimeConfigInfo(_ context.Context, r *runtime.GetRuntimeConfigInfoRequest) (res *runtime.GetRuntimeConfigInfoResponse, err error) {
// Mapping used when we just use the host user namespace
// See /proc/self/uid_map on the host
//hostMapping := &runtime.LinuxIDMapping{
// ContainerId: uint32(0),
// HostId: uint32(0),
// Size_: uint32(4294967295),
//}

// Example of mapping we can use in containers
linuxConfig := &runtime.LinuxUserNamespaceConfig{
UidMappings: []*runtime.LinuxIDMapping{
&runtime.LinuxIDMapping{
ContainerId: uint32(0),
HostId: uint32(100000),
Size_: uint32(65535),
},
},
GidMappings: []*runtime.LinuxIDMapping{
&runtime.LinuxIDMapping{
ContainerId: uint32(0),
HostId: uint32(100000),
Size_: uint32(65535),
},
},
}
activeRuntimeConfig := &runtime.ActiveRuntimeConfig{UserNamespaceConfig: linuxConfig}
return &runtime.GetRuntimeConfigInfoResponse{RuntimeConfig: activeRuntimeConfig}, nil
}
Loading