Skip to content
This repository was archived by the owner on Mar 9, 2022. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cluster/gce/cloud-init/master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ write_files:
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNPROC=infinity
LimitCORE=infinity
SupplementaryGroups=0
ExecStartPre=/sbin/modprobe overlay
ExecStart=/home/containerd/usr/local/bin/containerd

Expand Down
1 change: 1 addition & 0 deletions cluster/gce/cloud-init/node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ write_files:
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNPROC=infinity
LimitCORE=infinity
SupplementaryGroups=0
ExecStartPre=/sbin/modprobe overlay
ExecStart=/home/containerd/usr/local/bin/containerd

Expand Down
1 change: 1 addition & 0 deletions contrib/systemd-units/containerd.service
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ LimitNOFILE=1048576
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNPROC=infinity
LimitCORE=infinity
SupplementaryGroups=0

[Install]
WantedBy=multi-user.target
27 changes: 27 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,15 @@ type RegistryConfig struct {
TLS *TLSConfig `toml:"tls" json:"tls"`
}

type LinuxIDMapping struct {
// container_id is the starting id for the mapping inside the container.
ContainerID uint32 `toml:"container_id" json:"containerID"`
// host_id is the starting id for the mapping on the host.
HostID uint32 `toml:"host_id" json:"hostID"`
// size is the length of the mapping.
Size uint32 `toml:"size" json:"size"`
}

// PluginConfig contains toml config related to CRI plugin,
// it is a subset of Config.
type PluginConfig struct {
Expand Down Expand Up @@ -204,6 +213,10 @@ type PluginConfig struct {
// DisableProcMount disables Kubernetes ProcMount support. This MUST be set to `true`
// when using containerd with Kubernetes <=1.11.
DisableProcMount bool `toml:"disable_proc_mount" json:"disableProcMount"`
// NodeWideUIDMapping is the UID mapping to use when NamespaceOption.User = NOD
NodeWideUIDMapping LinuxIDMapping `toml:"node_wide_uid_mapping" json:"nodeWideUIDMapping"`
// NodeWideGIDMapping is the GID mapping to use when NamespaceOption.User = NODE
NodeWideGIDMapping LinuxIDMapping `toml:"node_wide_gid_mapping" json:"nodeWideGIDMapping"`
}

// X509KeyPairStreaming contains the x509 configuration for streaming
Expand Down Expand Up @@ -271,6 +284,8 @@ func DefaultConfig() PluginConfig {
},
MaxConcurrentDownloads: 3,
DisableProcMount: false,
NodeWideUIDMapping: LinuxIDMapping{0, 0, 4294967295},
NodeWideGIDMapping: LinuxIDMapping{0, 0, 4294967295},
}
}

Expand Down Expand Up @@ -359,5 +374,17 @@ func ValidatePluginConfig(ctx context.Context, c *PluginConfig) error {
return errors.Wrap(err, "invalid stream idle timeout")
}
}

// There should be a root in the container
if c.NodeWideUIDMapping.ContainerID != 0 || c.NodeWideGIDMapping.ContainerID != 0 {
return errors.New("missing root id in container")
}

// UID and GID mapping should be the same on containerd/cri 1.3.
// This can be revisited when oci.WithUserNamespace() gets support for different mappings.
// See https://github.com/containerd/containerd/commit/51a6813c06030ae2b3fcf9ec068e4b39cd2d1e69
if c.NodeWideUIDMapping != c.NodeWideGIDMapping {
return errors.New("different mappings for uid and gid not yet supported")
}
return nil
}
56 changes: 56 additions & 0 deletions pkg/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,62 @@ func TestValidateConfig(t *testing.T) {
},
expectedErr: "invalid stream idle timeout",
},
"valid id mapping": {
config: &PluginConfig{
ContainerdConfig: ContainerdConfig{
DefaultRuntimeName: RuntimeDefault,
Runtimes: map[string]Runtime{
RuntimeDefault: {
Type: plugin.RuntimeLinuxV1,
},
},
},
NodeWideUIDMapping: LinuxIDMapping{0, 800000, 65536},
NodeWideGIDMapping: LinuxIDMapping{0, 800000, 65536},
},
expected: &PluginConfig{
ContainerdConfig: ContainerdConfig{
DefaultRuntimeName: RuntimeDefault,
Runtimes: map[string]Runtime{
RuntimeDefault: {
Type: plugin.RuntimeLinuxV1,
},
},
},
NodeWideUIDMapping: LinuxIDMapping{0, 800000, 65536},
NodeWideGIDMapping: LinuxIDMapping{0, 800000, 65536},
},
},
"invalid id mapping": {
config: &PluginConfig{
ContainerdConfig: ContainerdConfig{
DefaultRuntimeName: RuntimeDefault,
Runtimes: map[string]Runtime{
RuntimeDefault: {
Type: plugin.RuntimeLinuxV1,
},
},
},
NodeWideUIDMapping: LinuxIDMapping{1, 100000, 65536},
NodeWideGIDMapping: LinuxIDMapping{1, 100000, 65536},
},
expectedErr: "missing root id in container",
},
"different uid and gid mappings": {
config: &PluginConfig{
ContainerdConfig: ContainerdConfig{
DefaultRuntimeName: RuntimeDefault,
Runtimes: map[string]Runtime{
RuntimeDefault: {
Type: plugin.RuntimeLinuxV1,
},
},
},
NodeWideUIDMapping: LinuxIDMapping{0, 100000, 65536},
NodeWideGIDMapping: LinuxIDMapping{0, 200000, 65536},
},
expectedErr: "different mappings for uid and gid not yet supported",
},
} {
t.Run(desc, func(t *testing.T) {
err := ValidatePluginConfig(context.Background(), test.config)
Expand Down
19 changes: 19 additions & 0 deletions pkg/containerd/opts/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,25 @@ func WithNewSnapshot(id string, i containerd.Image) containerd.NewContainerOpts
}
}

// WithRemappedSnapshot wraps `containerd.WithRemappedSnapshot` so that if creating the
// snapshot fails we make sure the image is actually unpacked and and retry.
func WithRemappedSnapshot(id string, i containerd.Image, uid, gid uint32) containerd.NewContainerOpts {
f := containerd.WithRemappedSnapshot(id, i, uid, gid)
return func(ctx context.Context, client *containerd.Client, c *containers.Container) error {
if err := f(ctx, client, c); err != nil {
if !errdefs.IsNotFound(err) {
return err
}

if err := i.Unpack(ctx, c.Snapshotter); err != nil {
return errors.Wrap(err, "error unpacking image")
}
return f(ctx, client, c)
}
return nil
}
}

// WithVolumes copies ownership of volume in rootfs to its corresponding host path.
// It doesn't update runtime spec.
// The passed in map is a host path to container path map for all volumes.
Expand Down
7 changes: 7 additions & 0 deletions pkg/containerd/opts/spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,8 @@ const (
utsNSFormat = "/proc/%v/ns/uts"
// pidNSFormat is the format of pid namespace of a process.
pidNSFormat = "/proc/%v/ns/pid"
// userNSFormat is the format of user namespace of a process.
userNSFormat = "/proc/%v/ns/user"
)

// GetNetworkNamespace returns the network namespace of a process.
Expand All @@ -743,3 +745,8 @@ func GetUTSNamespace(pid uint32) string {
func GetPIDNamespace(pid uint32) string {
return fmt.Sprintf(pidNSFormat, pid)
}

// GetUserNamespace returns the user namespace of a process.
func GetUserNamespace(pid uint32) string {
return fmt.Sprintf(userNSFormat, pid)
}
51 changes: 35 additions & 16 deletions pkg/netns/netns.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ const nsRunDir = "/var/run/netns"

// newNS creates a new persistent (bind-mounted) network namespace and returns the
// path to the network namespace.
func newNS() (nsPath string, err error) {
// If pid is non-zero, reuses the netns from this pid
func newNS(pid uint32) (nsPath string, err error) {
b := make([]byte, 16)
if _, err := rand.Reader.Read(b); err != nil {
return "", errors.Wrap(err, "failed to generate random netns name")
Expand Down Expand Up @@ -94,26 +95,33 @@ func newNS() (nsPath string, err error) {
// Don't unlock. By not unlocking, golang will kill the OS thread when the
// goroutine is done (for go1.10+)

var origNS cnins.NetNS
origNS, err = cnins.GetNS(getCurrentThreadNetNSPath())
if err != nil {
return
}
defer origNS.Close()
var procNsPath string
if pid == 0 {
var origNS cnins.NetNS
origNS, err = cnins.GetNS(getCurrentThreadNetNSPath())
if err != nil {
return
}
defer origNS.Close()

// create a new netns on the current thread
err = unix.Unshare(unix.CLONE_NEWNET)
if err != nil {
return
}
// create a new netns on the current thread
err = unix.Unshare(unix.CLONE_NEWNET)
if err != nil {
return
}

// Put this thread back to the orig ns, since it might get reused (pre go1.10)
defer origNS.Set() // nolint: errcheck
// Put this thread back to the orig ns, since it might get reused (pre go1.10)
defer origNS.Set() // nolint: errcheck

procNsPath = getCurrentThreadNetNSPath()
} else {
procNsPath = getNetNSPathFromPID(pid)
}

// bind mount the netns from the current thread (from /proc) onto the
// mount point. This causes the namespace to persist, even when there
// are no threads in the ns.
err = unix.Mount(getCurrentThreadNetNSPath(), nsPath, "none", unix.MS_BIND, "")
err = unix.Mount(procNsPath, nsPath, "none", unix.MS_BIND, "")
if err != nil {
err = errors.Wrapf(err, "failed to bind mount ns at %s", nsPath)
}
Expand Down Expand Up @@ -156,14 +164,25 @@ func getCurrentThreadNetNSPath() string {
return fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid())
}

func getNetNSPathFromPID(pid uint32) string {
return fmt.Sprintf("/proc/%d/ns/net", pid)
}

// NetNS holds network namespace.
type NetNS struct {
path string
}

// NewNetNS creates a network namespace.
func NewNetNS() (*NetNS, error) {
path, err := newNS()
return NetNSFromPID(0)
}

// NetNSFromPID reuses the existing network namespace from a process and bind
// mount it in the same way as NewNetNS() so we can run Do() in the same way
// regardless of the lifecycle of the pid.
func NetNSFromPID(pid uint32) (*NetNS, error) {
path, err := newNS(pid)
if err != nil {
return nil, errors.Wrap(err, "failed to setup netns")
}
Expand Down
36 changes: 34 additions & 2 deletions pkg/server/container_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,22 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta

log.G(ctx).Debugf("Container %q spec: %#+v", id, spew.NewFormatter(spec))

securityContext := config.GetLinux().GetSecurityContext()

var snapshotterOption containerd.NewContainerOpts
switch securityContext.GetNamespaceOptions().GetUser() {
case runtime.NamespaceMode_CONTAINER:
return nil, errors.New("unsupported user namespace mode: CONTAINER")
case runtime.NamespaceMode_NODE:
snapshotterOption = customopts.WithNewSnapshot(id, containerdImage)
case runtime.NamespaceMode_POD:
snapshotterOption = customopts.WithRemappedSnapshot(id, containerdImage,
c.config.NodeWideUIDMapping.HostID-c.config.NodeWideUIDMapping.ContainerID,
c.config.NodeWideGIDMapping.HostID-c.config.NodeWideGIDMapping.ContainerID)
default:
return nil, errors.Wrapf(err, "invalid user namespace option %d for sandbox %q", securityContext.GetNamespaceOptions().GetUser(), id)
}

// Set snapshotter before any other options.
opts := []containerd.NewContainerOpts{
containerd.WithSnapshotter(c.config.ContainerdConfig.Snapshotter),
Expand All @@ -183,7 +199,7 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
// the runtime (runc) a chance to modify (e.g. to create mount
// points corresponding to spec.Mounts) before making the
// rootfs readonly (requested by spec.Root.Readonly).
customopts.WithNewSnapshot(id, containerdImage),
snapshotterOption,
}

if len(volumeMounts) > 0 {
Expand Down Expand Up @@ -220,7 +236,6 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
}()

var specOpts []oci.SpecOpts
securityContext := config.GetLinux().GetSecurityContext()
// Set container username. This could only be done by containerd, because it needs
// access to the container rootfs. Pass user name to containerd, and let it overwrite
// the spec for us.
Expand Down Expand Up @@ -438,6 +453,23 @@ func (c *criService) generateContainerSpec(id string, sandboxID string, sandboxP
customopts.WithAnnotation(annotations.SandboxID, sandboxID),
)

switch securityContext.GetNamespaceOptions().GetUser() {
case runtime.NamespaceMode_NODE:
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UserNamespace))
case runtime.NamespaceMode_POD:
specOpts = append(specOpts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.UserNamespace, Path: customopts.GetUserNamespace(sandboxPid)}))
// When re-vendoring vendor/github.com/containerd/containerd/oci/spec_opts.go,
// the following line would need to be updated to:
// specOpts = append(specOpts, oci.WithUserNamespace(uidMap, gidMap))
// See:
// https://github.com/containerd/containerd/commit/51a6813c06030ae2b3fcf9ec068e4b39cd2d1e69
specOpts = append(specOpts, oci.WithUserNamespace(
c.config.NodeWideUIDMapping.ContainerID,
c.config.NodeWideUIDMapping.HostID,
c.config.NodeWideUIDMapping.Size,
))
}

return runtimeSpec(id, specOpts...)
}

Expand Down
40 changes: 40 additions & 0 deletions pkg/server/container_create_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -985,6 +985,46 @@ func TestPidNamespace(t *testing.T) {
}
}

func TestUserNamespace(t *testing.T) {
testID := "test-id"
sandboxPid := uint32(1234)
testSandboxID := "sandbox-id"
containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
ociRuntime := config.Runtime{}
c := newTestCRIService()
for desc, test := range map[string]struct {
userNS runtime.NamespaceMode
expected runtimespec.LinuxNamespace
expectedMissing bool
}{
"node namespace mode": {
userNS: runtime.NamespaceMode_NODE,
expected: runtimespec.LinuxNamespace{
Type: runtimespec.UserNamespace,
Path: opts.GetUserNamespace(sandboxPid),
},
expectedMissing: true,
},
"pod namespace mode": {
userNS: runtime.NamespaceMode_POD,
expected: runtimespec.LinuxNamespace{
Type: runtimespec.UserNamespace,
Path: opts.GetUserNamespace(sandboxPid),
},
},
} {
t.Logf("TestCase %q", desc)
containerConfig.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{User: test.userNS}
spec, err := c.generateContainerSpec(testID, testSandboxID, sandboxPid, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
require.NoError(t, err)
if test.expectedMissing {
assert.NotContains(t, spec.Linux.Namespaces, test.expected)
} else {
assert.Contains(t, spec.Linux.Namespaces, test.expected)
}
}
}

func TestNoDefaultRunMount(t *testing.T) {
testID := "test-id"
testPid := uint32(1234)
Expand Down
Loading