[lxc-devel] [crio-lxc/master] Make crio-lxc working with kubernetes
Drachenfels-GmbH on Github
lxc-bot at linuxcontainers.org
Fri Jul 24 07:38:13 UTC 2020
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 3044 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200724/00564149/attachment-0001.bin>
-------------- next part --------------
From c6397825ff9e24abff09016132a76f893d5ff510 Mon Sep 17 00:00:00 2001
From: Ruben Jenster <r.jenster at drachenfels.de>
Date: Mon, 6 Jul 2020 19:30:53 +0200
Subject: [PATCH 01/19] create: Poll for container status to fix container
startup.
Signed-off-by: Ruben Jenster <r.jenster at drachenfels.de>
---
cmd/create.go | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/cmd/create.go b/cmd/create.go
index f0f20a5..78990e4 100644
--- a/cmd/create.go
+++ b/cmd/create.go
@@ -295,6 +295,19 @@ func makeSyncFifo(dir string) error {
return nil
}
+func waitContainer(c *lxc.Container, state lxc.State, timeout time.Duration) bool {
+ deadline := time.Now().Add(timeout)
+ // liblxc.Wait / go-libxc.Wait do not block when container is stopped. BUG in liblxc ?
+ // https://github.com/lxc/lxc/issues/2027
+ for time.Now().Before(deadline) {
+ if c.State() == state {
+ return true
+ }
+ time.Sleep(time.Millisecond * 50)
+ }
+ return false
+}
+
func startContainer(c *lxc.Container, spec *specs.Spec) error {
binary, err := os.Readlink("/proc/self/exe")
if err != nil {
@@ -317,8 +330,10 @@ func startContainer(c *lxc.Container, spec *specs.Spec) error {
cmdErr := cmd.Start()
+ log.Debugf("LXC container PID %d", c.InitPid())
+
if cmdErr == nil {
- if !c.Wait(lxc.RUNNING, 30*time.Second) {
+ if !waitContainer(c, lxc.RUNNING, 30*time.Second) {
cmdErr = fmt.Errorf("Container failed to initialize")
}
}
From 75db967708cb67b96bd782634e258fc5ee2336f2 Mon Sep 17 00:00:00 2001
From: Ruben Jenster <r.jenster at drachenfels.de>
Date: Wed, 8 Jul 2020 15:55:57 +0200
Subject: [PATCH 02/19] Add stub --systemd-cgroup cmdline flag to gracefully
handle default crio config.
Signed-off-by: Ruben Jenster <r.jenster at drachenfels.de>
---
cmd/main.go | 4 ++++
test/crio.conf.in | 9 ++++++---
2 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/cmd/main.go b/cmd/main.go
index 36a02dc..0598913 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -44,6 +44,10 @@ func main() {
Usage: "set the lxc path to use",
Value: "/var/lib/lxc",
},
+ cli.BoolFlag{
+ Name: "systemd-cgroup",
+ Usage: "enable systemd cgroup (unimplemented stub for conmon)",
+ },
}
app.Before = func(ctx *cli.Context) error {
diff --git a/test/crio.conf.in b/test/crio.conf.in
index eb0c7f8..5331fe7 100644
--- a/test/crio.conf.in
+++ b/test/crio.conf.in
@@ -81,7 +81,7 @@ grpc_max_recv_msg_size = 16777216
# default_runtime is the _name_ of the OCI runtime to be used as the default.
# The name is matched against the runtimes map below.
-default_runtime = "runc"
+default_runtime = "lxc"
# If true, the runtime will not use pivot_root, but instead use MS_MOVE.
no_pivot = false
@@ -89,6 +89,9 @@ no_pivot = false
# Path to the conmon binary, used for monitoring the OCI runtime.
conmon = "PACKAGES_DIR/conmon/bin/conmon"
+# Cgroup setting for conmon
+#conmon_cgroup = "system.slice"
+
# Path to pinns.
pinns_path = "PACKAGES_DIR/cri-o/bin/pinns"
@@ -110,7 +113,7 @@ seccomp_profile = "CRIOLXC_TEST_DIR/seccomp.json"
apparmor_profile = "unconfined"
# Cgroup management implementation used for the runtime.
-cgroup_manager = "cgroupfs"
+cgroup_manager = "systemd"
# List of default capabilities for containers. If it is empty or commented out,
# only the capabilities defined in the containers json file by the user/kube
@@ -213,7 +216,7 @@ manage_network_ns_lifecycle = false
# If no runtime_handler is provided, the runtime will be picked based on the level
# of trust of the workload.
-[crio.runtime.runtimes.runc]
+[crio.runtime.runtimes.lxc]
runtime_path = "CRIOLXC_BINARY"
runtime_type = "oci"
runtime_root = "CRIOLXC_TEST_DIR/runtime-root"
From e1faa3938c15193603fee74d3990650412336ded Mon Sep 17 00:00:00 2001
From: Ruben Jenster <r.jenster at drachenfels.de>
Date: Wed, 8 Jul 2020 15:57:16 +0200
Subject: [PATCH 03/19] Small documentation and test iprovements.
Signed-off-by: Ruben Jenster <r.jenster at drachenfels.de>
---
.gitignore | 1 +
Makefile | 3 +++
README.md | 22 ++++++++++++++++++++++
test/helpers.bash | 3 +--
4 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/.gitignore b/.gitignore
index f940ec2..4a7f0c8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@ crio-lxc-test*
oci/
roots/
.stacker/
+.keeptempdirs
diff --git a/Makefile b/Makefile
index 1d8f2b2..a6598c3 100644
--- a/Makefile
+++ b/Makefile
@@ -17,6 +17,9 @@ check: crio-lxc
go test ./...
PACKAGES_DIR=$(PACKAGES_DIR) sudo -E "PATH=$$PATH" bats -t $(patsubst %,test/%.bats,$(TEST))
+%.bats: crio-lxc
+ PACKAGES_DIR=$(PACKAGES_DIR) sudo -E "PATH=$$PATH" bats -t test/$@
+
.PHONY: vendorup
vendorup:
go get -u
diff --git a/README.md b/README.md
index fbce9fd..53039d2 100644
--- a/README.md
+++ b/README.md
@@ -51,3 +51,25 @@ You'll also need conntrack installed:
```
apt install conntrack
```
+
+You have to install [bats](https://github.com/bats-core/bats-core) to run the tests.
+On debian you can install bats with:
+
+ apt-get install bats
+
+
+To keep the tempdir of of a test run, you have to create the lockfile `.keeptempdirs`
+in the top-level of this repository.
+
+ touch .keeptempdirs
+
+To debug `crictl` calls within a test run:
+
+ CRICTLDEBUG="-D" make basic.bats
+
+`bats` does not show any output when the test was successful.
+The logfile is created in /tmp and deleted when the test was successful.
+To watch the test output while the test is running:
+
+ tail -f /tmp/bats.*.log
+
diff --git a/test/helpers.bash b/test/helpers.bash
index 2bba1b2..d8800d1 100644
--- a/test/helpers.bash
+++ b/test/helpers.bash
@@ -53,8 +53,7 @@ function cleanup_tempdir {
function crictl {
# watch out for: https://github.com/kubernetes-sigs/cri-tools/issues/460
# If you need more debug output, set CRICTLDEBUG to -D
- CRICTLDEBUG=""
- $(which crictl) ${CRICTLDEBUG} --runtime-endpoint "$TEMP_DIR/crio.sock" $@
+ $(which crictl) ${CRICTLDEBUG} --runtime-endpoint "unix://$TEMP_DIR/crio.sock" $@
echo "$output"
}
From 8c116566e314b0940fff2af4c530142ef698aabc Mon Sep 17 00:00:00 2001
From: Ruben Jenster <r.jenster at drachenfels.de>
Date: Wed, 8 Jul 2020 17:17:14 +0200
Subject: [PATCH 04/19] create: Fix lxc.mount.entry generation
Signed-off-by: Ruben Jenster <r.jenster at drachenfels.de>
---
cmd/create.go | 29 ++++++++++++++++++++++++++++-
1 file changed, 28 insertions(+), 1 deletion(-)
diff --git a/cmd/create.go b/cmd/create.go
index 78990e4..8c4412a 100644
--- a/cmd/create.go
+++ b/cmd/create.go
@@ -227,8 +227,35 @@ func configureContainer(ctx *cli.Context, c *lxc.Container, spec *specs.Spec) er
}
for _, ms := range spec.Mounts {
+ // ignore cgroup mount, lxc automouts this even with lxc.rootfs.managed = 0
+ // conf.c:mount_entry:1854 - Device or resource busy - Failed to mount "cgroup" on "/usr/lib/x86_64-linux-gnu/lxc/rootfs/sys/fs/cgroup"
+ if ms.Type == "cgroup" {
+ continue
+ }
+
+ // create target files and directories
+ info, err := os.Stat(ms.Source)
+ if err == nil {
+ if info.IsDir() {
+ ms.Options = append(ms.Options, "create=dir")
+ } else {
+ ms.Options = append(ms.Options, "create=file")
+ }
+ } else {
+ // This case catches all kind of virtual and remote filesystems (/dev/pts, /dev/shm, sysfs, procfs, dev ...)
+ // It can not be a file because the source file for a bind mount must exist.
+ if os.IsNotExist(err) {
+ ms.Options = append(ms.Options, "create=dir")
+ } else {
+ log.Debugf("failed to stat source %s of mountpoint %s: %s", ms.Source, ms.Destination, err)
+ }
+ }
+
opts := strings.Join(ms.Options, ",")
- mnt := fmt.Sprintf("%s %s %s %s", ms.Source, ms.Destination, ms.Type, opts)
+ // Make mount paths relative to container root https://github.com/lxc/lxc/issues/2276
+ dest := strings.TrimLeft(ms.Destination, "/")
+ mnt := fmt.Sprintf("%s %s %s %s", ms.Source, dest, ms.Type, opts)
+
if err := c.SetConfigItem("lxc.mount.entry", mnt); err != nil {
return errors.Wrap(err, "failed to set mount config")
}
From 65bd9bbd2287fd7bb937246f3b79616bc9912958 Mon Sep 17 00:00:00 2001
From: Ruben Jenster <r.jenster at drachenfels.de>
Date: Wed, 8 Jul 2020 17:20:02 +0200
Subject: [PATCH 05/19] main: Fix enable debug logging.
Signed-off-by: Ruben Jenster <r.jenster at drachenfels.de>
---
cmd/main.go | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/cmd/main.go b/cmd/main.go
index 0598913..569bf4a 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -50,15 +50,17 @@ func main() {
},
}
+ log.SetLevel(log.InfoLevel)
+
app.Before = func(ctx *cli.Context) error {
LXC_PATH = ctx.String("lxc-path")
-
debug = ctx.Bool("debug")
+ if debug {
+ log.SetLevel(log.DebugLevel)
+ }
return nil
}
- log.SetLevel(log.InfoLevel)
-
if err := app.Run(os.Args); err != nil {
format := "error: %v\n"
if debug {
From e314dd29d719ad2563ad344cc6d36811fe130c4d Mon Sep 17 00:00:00 2001
From: Ruben Jenster <r.jenster at drachenfels.de>
Date: Wed, 8 Jul 2020 17:21:21 +0200
Subject: [PATCH 06/19] Disable checkHackyPreStart that prevents proper
container termination.
Signed-off-by: Ruben Jenster <r.jenster at drachenfels.de>
---
cmd/kill.go | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cmd/kill.go b/cmd/kill.go
index 3af4ddf..a665048 100644
--- a/cmd/kill.go
+++ b/cmd/kill.go
@@ -93,7 +93,7 @@ func doKill(ctx *cli.Context) error {
}
- if c.Running() && checkHackyPreStart(c) == "started" {
+ if c.Running() { //&& checkHackyPreStart(c) == "started" {
pid := c.InitPid()
if err := unix.Kill(pid, signalMap[ctx.String("signal")]); err != nil {
From a713a4be7adae75c1d5f7f72027a139e34852351 Mon Sep 17 00:00:00 2001
From: Ruben Jenster <r.jenster at drachenfels.de>
Date: Fri, 10 Jul 2020 16:34:52 +0200
Subject: [PATCH 07/19] exec: Implement running a new command within a running
container.
Signed-off-by: Ruben Jenster <r.jenster at drachenfels.de>
---
cmd/exec.go | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++++
cmd/main.go | 1 +
2 files changed, 100 insertions(+)
create mode 100644 cmd/exec.go
diff --git a/cmd/exec.go b/cmd/exec.go
new file mode 100644
index 0000000..a13d00f
--- /dev/null
+++ b/cmd/exec.go
@@ -0,0 +1,99 @@
+package main
+
+import (
+ "encoding/json"
+ "fmt"
+ "io/ioutil"
+ "os"
+
+ "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/pkg/errors"
+ "github.com/urfave/cli"
+
+ "golang.org/x/sys/unix"
+
+ lxc "gopkg.in/lxc/go-lxc.v2"
+)
+
+var execCmd = cli.Command{
+ Name: "exec",
+ Usage: "execute a new process in a running container",
+ ArgsUsage: "<containerID>",
+ Action: doExec,
+ Flags: []cli.Flag{
+ cli.StringFlag{
+ Name: "process, p",
+ Usage: "path to process json",
+ Value: "",
+ },
+ cli.StringFlag{
+ Name: "pid-file",
+ Usage: "file to write the process id to",
+ Value: "",
+ },
+ cli.BoolFlag{
+ Name: "detach, d",
+ Usage: "detach from the executed process",
+ },
+ },
+}
+
+// NOTE stdio (stdout/stderr) is not attached when adding unix.CLONE_NEWUSER
+const EXEC_NAMESPACES = unix.CLONE_NEWIPC | unix.CLONE_NEWNS | unix.CLONE_NEWUTS | unix.CLONE_NEWNET | unix.CLONE_NEWCGROUP | unix.CLONE_NEWPID
+
+func doExec(ctx *cli.Context) error {
+ containerID := ctx.Args().First()
+ if len(containerID) == 0 {
+ fmt.Fprintf(os.Stderr, "missing container ID\n")
+ cli.ShowCommandHelpAndExit(ctx, "exec", 1)
+ }
+
+ c, err := lxc.NewContainer(containerID, LXC_PATH)
+ if err != nil {
+ return errors.Wrap(err, "failed to create new container")
+ }
+ defer c.Release()
+
+ attachOpts := lxc.AttachOptions{
+ Namespaces: EXEC_NAMESPACES,
+ }
+
+ var procArgs []string
+ specFilePath := ctx.String("process")
+ specData, err := ioutil.ReadFile(specFilePath)
+ if err == nil {
+ // prefer the process spec file
+ var procSpec *specs.Process
+ err := json.Unmarshal(specData, &procSpec)
+ if err != nil {
+ return errors.Wrapf(err, "failed to read process spec from %s: %s", specFilePath, err)
+ }
+ // tanslate process spec to lxc.AttachOptions
+ procArgs = procSpec.Args
+ attachOpts.UID = int(procSpec.User.UID)
+ attachOpts.GID = int(procSpec.User.GID)
+ attachOpts.Cwd = procSpec.Cwd
+ attachOpts.Env = procSpec.Env
+ } else {
+ // fall back to cmdline arguments
+ if len(ctx.Args()) >= 2 {
+ procArgs = ctx.Args()[1:]
+ }
+ }
+
+ if ctx.Bool("detach") {
+ // FIXME detach is not called by conmon ! why ?
+ pid, err := c.RunCommandNoWait(procArgs, attachOpts)
+ pidFile := ctx.String("pid-file")
+ err = ioutil.WriteFile(pidFile, []byte(fmt.Sprintf("%s\n", pid)), 0640)
+ if err != nil {
+ return errors.Wrapf(err, "failed to write pid file %s: %s", pidFile)
+ }
+ } else {
+ exitStatus, err := c.RunCommandStatus(procArgs, attachOpts)
+ if err != nil {
+ return errors.Wrapf(err, "Cmd returned with exit code %d", exitStatus)
+ }
+ }
+ return nil
+}
diff --git a/cmd/main.go b/cmd/main.go
index 569bf4a..3e1c877 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -24,6 +24,7 @@ func main() {
startCmd,
killCmd,
deleteCmd,
+ execCmd,
}
app.Flags = []cli.Flag{
From 014464fa76f94ccf8ce03c09501da2c32bd3494c Mon Sep 17 00:00:00 2001
From: Ruben Jenster <r.jenster at drachenfels.de>
Date: Wed, 15 Jul 2020 19:45:01 +0200
Subject: [PATCH 08/19] Ignore case for lxc --log-level flag
Signed-off-by: Ruben Jenster <r.jenster at drachenfels.de>
---
cmd/utils.go | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cmd/utils.go b/cmd/utils.go
index aa3a45b..2b6e38e 100644
--- a/cmd/utils.go
+++ b/cmd/utils.go
@@ -32,7 +32,7 @@ func readBundleSpec(specFilePath string) (spec *specs.Spec, err error) {
func configureLogging(ctx *cli.Context, c *lxc.Container) error {
if ctx.GlobalIsSet("log-level") {
var logLevel lxc.LogLevel
- switch ctx.GlobalString("log-level") {
+ switch strings.ToLower(ctx.GlobalString("log-level")) {
case "trace":
logLevel = lxc.TRACE
case "debug":
From 16c045e580e201310919fda1041e207a1bfcc165 Mon Sep 17 00:00:00 2001
From: Ruben Jenster <r.jenster at drachenfels.de>
Date: Wed, 15 Jul 2020 19:46:31 +0200
Subject: [PATCH 09/19] Prepend cri-lxc log output to lxc log-file.
Signed-off-by: Ruben Jenster <r.jenster at drachenfels.de>
---
cmd/main.go | 21 ++++++++++++++++++++-
1 file changed, 20 insertions(+), 1 deletion(-)
diff --git a/cmd/main.go b/cmd/main.go
index 3e1c877..9cb398d 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -2,6 +2,7 @@ package main
import (
"fmt"
+ stdlog "log"
"os"
"github.com/apex/log"
@@ -47,18 +48,36 @@ func main() {
},
cli.BoolFlag{
Name: "systemd-cgroup",
- Usage: "enable systemd cgroup (unimplemented stub for conmon)",
+ Usage: "enable systemd cgroup",
},
}
log.SetLevel(log.InfoLevel)
+ var logFile *os.File
app.Before = func(ctx *cli.Context) error {
LXC_PATH = ctx.String("lxc-path")
debug = ctx.Bool("debug")
if debug {
log.SetLevel(log.DebugLevel)
}
+ logFilePath := ctx.String("log-file")
+ if logFilePath != "" {
+ f, err := os.OpenFile(logFilePath, os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0640)
+ if err != nil {
+ log.Errorf("failed to open log file %s: %s", logFilePath, err)
+ } else {
+ logFile = f
+ stdlog.SetOutput(logFile)
+ }
+ }
+ return nil
+ }
+
+ app.After = func(ctx *cli.Context) error {
+ if logFile != nil {
+ return logFile.Close()
+ }
return nil
}
From 5284229f577a8c4db7149d9d2fdaec09c580cb94 Mon Sep 17 00:00:00 2001
From: Ruben Jenster <r.jenster at drachenfels.de>
Date: Wed, 15 Jul 2020 19:48:43 +0200
Subject: [PATCH 10/19] Set lxc process security attributes / apparmor profile
and cgroup root
Signed-off-by: Ruben Jenster <r.jenster at drachenfels.de>
---
cmd/create.go | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 50 insertions(+)
diff --git a/cmd/create.go b/cmd/create.go
index 8c4412a..d8e01d7 100644
--- a/cmd/create.go
+++ b/cmd/create.go
@@ -202,6 +202,48 @@ func doCreate(ctx *cli.Context) error {
return nil
}
+func configureContainerSecurity(ctx *cli.Context, c *lxc.Container, spec *specs.Spec) error {
+ // https://github.com/kubernetes/kubernetes/blob/a38a02792b55942177ee676a5e1993b18a8b4b0a/pkg/kubelet/apis/cri/runtime/v1alpha2/api.proto#L541
+ // // Privileged mode implies the following specific options are applied:
+ // 1. All capabilities are added.
+ // 2. Sensitive paths, such as kernel module paths within sysfs, are not masked.
+ // 3. Any sysfs and procfs mounts are mounted RW.
+ // 4. Apparmor confinement is not applied.
+ // 5. Seccomp restrictions are not applied.
+ // 6. The device cgroup does not restrict access to any devices.
+ // 7. All devices from the host's /dev are available within the container.
+ // 8. SELinux restrictions are not applied (e.g. label=disabled).
+ // security
+ // FIXME Kubelet does not set the 'io.kubernetes.cri-o.PrivilegedRuntime"
+ // https://github.com/containers/podman/blob/8704b78a6fbb953acb6b74d1671d5ad6456bf81f/pkg/annotations/annotations.go#L64
+
+ aaprofile := spec.Process.ApparmorProfile
+ if aaprofile == "" {
+ aaprofile = "generated"
+ }
+ if err := c.SetConfigItem("lxc.apparmor.profile", "generated"); err != nil {
+ //if err := c.SetConfigItem("lxc.apparmor.profile", "unconfined"); err != nil {
+ return errors.Wrapf(err, "faield to set apparmor.profile")
+ }
+ if aaprofile == "generated" {
+ // TODO Create apparmor profile from spec.Linux.Readonly and MaskedPaths
+ // set lxc.apparmor.raw
+ // see man apparmor.d
+ }
+
+ if err := c.SetConfigItem("lxc.proc.oom_score_adj", fmt.Sprintf("%d", *spec.Process.OOMScoreAdj)); err != nil {
+ return errors.Wrap(err, "failed to set lxc.proc.oom_score_adj")
+ }
+
+ if spec.Process.NoNewPrivileges {
+ if err := c.SetConfigItem("lxc.no_new_privs", "1"); err != nil {
+ return errors.Wrapf(err, "failed to set lxc.no_new_privs")
+ }
+ }
+
+ return nil
+}
+
func configureContainer(ctx *cli.Context, c *lxc.Container, spec *specs.Spec) error {
if ctx.Bool("debug") {
c.SetVerbosity(lxc.Verbose)
@@ -296,6 +338,14 @@ func configureContainer(ctx *cli.Context, c *lxc.Container, spec *specs.Spec) er
return errors.Wrap(err, "failed to configure namespaces")
}
+ if ctx.Bool("systemd-cgroup") {
+ c.SetConfigItem("lxc.cgroup.root", "system.slice")
+ }
+
+ if err := configureContainerSecurity(ctx, c, spec); err != nil {
+ return errors.Wrap(err, "failed to configure container security")
+ }
+
// capabilities?
// if !spec.Process.Terminal {
From f749ba85ab25d7183aba350e0ab6ea5fe08d1818 Mon Sep 17 00:00:00 2001
From: Ruben Jenster <r.jenster at drachenfels.de>
Date: Wed, 15 Jul 2020 19:49:48 +0200
Subject: [PATCH 11/19] Fix mount entry paths and symlink protection. WIP.
Signed-off-by: Ruben Jenster <r.jenster at drachenfels.de>
---
cmd/create.go | 21 +++++++++++++++++++--
1 file changed, 19 insertions(+), 2 deletions(-)
diff --git a/cmd/create.go b/cmd/create.go
index d8e01d7..542d1e0 100644
--- a/cmd/create.go
+++ b/cmd/create.go
@@ -294,9 +294,26 @@ func configureContainer(ctx *cli.Context, c *lxc.Container, spec *specs.Spec) er
}
opts := strings.Join(ms.Options, ",")
- // Make mount paths relative to container root https://github.com/lxc/lxc/issues/2276
- dest := strings.TrimLeft(ms.Destination, "/")
+
+ // Fix failing mounts due to symlink protection
+ // CVE-2015-1335: Protect container mounts against symlinks
+ // https://github.com/lxc/lxc/commit/592fd47a6245508b79fe6ac819fe6d3b2c1289be
+ // mount targets that contain symlinks must be resolved
+ dest := ms.Destination
+ if strings.HasPrefix(dest, "/var/run") {
+ dest = strings.TrimPrefix(dest, "/var")
+ }
+
+ // Either make mount destination paths relative to container or prepend the root path to them,
+ // otherwise lxc ignores the mount points.
+ // Hmmm, why does lxc not prepend the rootfs itself ?
+ // https://github.com/lxc/lxc/issues/2276
+ //dest := strings.TrimLeft(ms.Destination, "/")
+
+ dest = filepath.Join(spec.Root.Path, dest)
+
mnt := fmt.Sprintf("%s %s %s %s", ms.Source, dest, ms.Type, opts)
+ log.Debugf("adding mount entry %q", mnt)
if err := c.SetConfigItem("lxc.mount.entry", mnt); err != nil {
return errors.Wrap(err, "failed to set mount config")
From 6d4217f01a176298c0174e65c9aa6baa16dcc183 Mon Sep 17 00:00:00 2001
From: Ruben Jenster <r.jenster at drachenfels.de>
Date: Wed, 15 Jul 2020 19:59:09 +0200
Subject: [PATCH 12/19] Add debug wrapper for crio-lxc binary.
Signed-off-by: Ruben Jenster <r.jenster at drachenfels.de>
---
bin/crio-lxc-debug.sh | 14 ++++++++++++++
1 file changed, 14 insertions(+)
create mode 100755 bin/crio-lxc-debug.sh
diff --git a/bin/crio-lxc-debug.sh b/bin/crio-lxc-debug.sh
new file mode 100755
index 0000000..5ad9e3b
--- /dev/null
+++ b/bin/crio-lxc-debug.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+# About: debug wrapper for crio-lxc
+
+LOG=/tmp/crio-lxc.log.$$
+NEWARGS="--debug --log-level DEBUG --log-file $LOG $@"
+
+env >> $LOG
+echo ARGS:$@ >> $LOG
+echo ---- >> $LOG
+cat /tmp/exec-process-* >> $LOG
+echo ---- >> $LOG
+cat $PWD/config.json >> $LOG
+echo ---- >> $LOG
+exec /usr/local/bin/crio-lxc $NEWARGS
From ce45e0a68d693233c98e14ba28eaafe573b94d26 Mon Sep 17 00:00:00 2001
From: Ruben Jenster <r.jenster at drachenfels.de>
Date: Thu, 16 Jul 2020 20:23:24 +0200
Subject: [PATCH 13/19] Fix mounts on /var/run when /var/run is a symlink to
/run.
Finally the kubernetes dashboard and deployments are working.
Signed-off-by: Ruben Jenster <r.jenster at drachenfels.de>
---
cmd/create.go | 40 +++++++++++++++++++++++++---------------
1 file changed, 25 insertions(+), 15 deletions(-)
diff --git a/cmd/create.go b/cmd/create.go
index 542d1e0..059c2bb 100644
--- a/cmd/create.go
+++ b/cmd/create.go
@@ -292,27 +292,37 @@ func configureContainer(ctx *cli.Context, c *lxc.Container, spec *specs.Spec) er
log.Debugf("failed to stat source %s of mountpoint %s: %s", ms.Source, ms.Destination, err)
}
}
-
opts := strings.Join(ms.Options, ",")
- // Fix failing mounts due to symlink protection
- // CVE-2015-1335: Protect container mounts against symlinks
- // https://github.com/lxc/lxc/commit/592fd47a6245508b79fe6ac819fe6d3b2c1289be
- // mount targets that contain symlinks must be resolved
- dest := ms.Destination
- if strings.HasPrefix(dest, "/var/run") {
- dest = strings.TrimPrefix(dest, "/var")
+ mountDest := ms.Destination
+
+ // Resolve mount mountDestinatination paths containing symlinks.
+ // Symlinks in mount mountDestination paths are not allowed in LXC.
+ // See CVE-2015-1335: Protect container mounts against symlinks
+ // and https://github.com/lxc/lxc/commit/592fd47a6245508b79fe6ac819fe6d3b2c1289be
+ //
+ // So mount targets that contain symlinks must be resolved.
+ // e.g k8s service account tokens are mounted to /var/run/secrets/kubernetes.io/serviceaccount
+ // TODO Check whether a recursive and generic implementation is required!
+ if strings.HasPrefix(mountDest, "/var/run/") {
+ stat, err := os.Lstat(filepath.Join(spec.Root.Path, "/var/run/"))
+ if err == nil {
+ if stat.Mode() & os.ModeSymlink != 0 {
+ // resolve the symlink from /var/run to /run
+ mountDest = strings.TrimPrefix(mountDest, "/var")
+ }
+ }
}
- // Either make mount destination paths relative to container or prepend the root path to them,
- // otherwise lxc ignores the mount points.
- // Hmmm, why does lxc not prepend the rootfs itself ?
- // https://github.com/lxc/lxc/issues/2276
- //dest := strings.TrimLeft(ms.Destination, "/")
+ // Mount destinations must be either relative to the container root or absolute to
+ // the directory on the host containing the rootfs.
+ // LXC simply ignores relative mounts paths to an absolute rootfs.
- dest = filepath.Join(spec.Root.Path, dest)
+ // See https://github.com/lxc/lxc/issues/2276
+ // See man lxc.container.conf #MOUNT POINTS
+ mountDest = filepath.Join(spec.Root.Path, mountDest)
- mnt := fmt.Sprintf("%s %s %s %s", ms.Source, dest, ms.Type, opts)
+ mnt := fmt.Sprintf("%s %s %s %s", ms.Source, mountDest, ms.Type, opts)
log.Debugf("adding mount entry %q", mnt)
if err := c.SetConfigItem("lxc.mount.entry", mnt); err != nil {
From 83f6e0d14bd5303d9af0239bcd9303120e377404 Mon Sep 17 00:00:00 2001
From: Ruben Jenster <r.jenster at drachenfels.de>
Date: Wed, 22 Jul 2020 10:20:55 +0200
Subject: [PATCH 14/19] Fix spec.Process command execution.
Signed-off-by: Ruben Jenster <r.jenster at drachenfels.de>
---
cmd/create.go | 33 +++++++++++++++++++++++++++++----
go.mod | 1 +
go.sum | 2 ++
3 files changed, 32 insertions(+), 4 deletions(-)
diff --git a/cmd/create.go b/cmd/create.go
index 059c2bb..34e6136 100644
--- a/cmd/create.go
+++ b/cmd/create.go
@@ -16,6 +16,7 @@ import (
"github.com/apex/log"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
+ "github.com/segmentio/ksuid"
"github.com/urfave/cli"
lxc "gopkg.in/lxc/go-lxc.v2"
@@ -90,14 +91,33 @@ const (
func emitFifoWaiter(file string) error {
fifoWaiter := fmt.Sprintf(`#!/bin/sh
-stat /syncfifo
echo "%s" | tee /syncfifo
-exec $@
+echo "Sourcing command file $1"
+echo "-----------------------"
+cat $1
+echo "-----------------------"
+. $1
`, SYNC_FIFO_CONTENT)
return ioutil.WriteFile(file, []byte(fifoWaiter), 0755)
}
+// Write the container init command to a file.
+// This file is then sourced by the file /syncfifo on container startup.
+// Every command argument is quoted so `exec` can process them properly.
+func emitCmdFile(cmdFile string, args ...string) error {
+ // https://stackoverflow.com/questions/33887194/how-to-set-multiple-commands-in-one-yaml-file-with-kubernetes
+ buf := strings.Builder{}
+ buf.WriteString("exec")
+ for _, arg := range args {
+ buf.WriteRune(' ')
+ buf.WriteRune('"')
+ buf.WriteString(arg)
+ buf.WriteRune('"')
+ }
+ return ioutil.WriteFile(cmdFile, []byte(buf.String()), 0640)
+}
+
func configureNamespaces(c *lxc.Container, spec *specs.Spec) error {
procPidPathRE := regexp.MustCompile(`/proc/(\d+)/ns`)
@@ -352,8 +372,13 @@ func configureContainer(ctx *cli.Context, c *lxc.Container, spec *specs.Spec) er
return errors.Wrap(err, "failed to set hostname")
}
- argsString := "/fifo-wait " + strings.Join(spec.Process.Args, " ")
- if err := c.SetConfigItem("lxc.execute.cmd", argsString); err != nil {
+ cmd := fmt.Sprintf("/mycmd.%s", ksuid.New().String())
+ err = emitCmdFile(path.Join(spec.Root.Path, cmd), spec.Process.Args...)
+ if err != nil {
+ return errors.Wrapf(err, "could not write command file")
+ }
+
+ if err := c.SetConfigItem("lxc.execute.cmd", "/fifo-wait "+cmd); err != nil {
return errors.Wrap(err, "failed to set lxc.execute.cmd")
}
diff --git a/go.mod b/go.mod
index 7f80ccc..a8ec1d1 100644
--- a/go.mod
+++ b/go.mod
@@ -7,6 +7,7 @@ require (
github.com/onsi/ginkgo v1.8.0 // indirect
github.com/opencontainers/runtime-spec v1.0.1
github.com/pkg/errors v0.8.1
+ github.com/segmentio/ksuid v1.0.3
github.com/stretchr/objx v0.2.0 // indirect
github.com/urfave/cli v1.20.0
golang.org/x/crypto v0.0.0-20190621222207-cc06ce4a13d4 // indirect
diff --git a/go.sum b/go.sum
index cc4071c..b91cb46 100644
--- a/go.sum
+++ b/go.sum
@@ -33,6 +33,8 @@ github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/fastuuid v1.1.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
+github.com/segmentio/ksuid v1.0.3 h1:FoResxvleQwYiPAVKe1tMUlEirodZqlqglIuFsdDntY=
+github.com/segmentio/ksuid v1.0.3/go.mod h1:/XUiZBD3kVx5SmUOl55voK5yeAbBNNIed+2O73XgrPE=
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/smartystreets/assertions v1.0.0/go.mod h1:kHHU4qYBaI3q23Pp3VPrmWhuIUrLW/7eUrw0BU5VaoM=
github.com/smartystreets/go-aws-auth v0.0.0-20180515143844-0c1422d1fdb9/go.mod h1:SnhjPscd9TpLiy1LpzGSKh3bXCfxxXuqd9xmQJy3slM=
From ca653b7fe24362468618b7da817e313b62d4ade4 Mon Sep 17 00:00:00 2001
From: Ruben Jenster <r.jenster at drachenfels.de>
Date: Wed, 22 Jul 2020 10:23:12 +0200
Subject: [PATCH 15/19] Fix container mount path destination for LXC.
Signed-off-by: Ruben Jenster <r.jenster at drachenfels.de>
---
cmd/create.go | 33 ++++++---------------------------
cmd/utils.go | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 57 insertions(+), 27 deletions(-)
diff --git a/cmd/create.go b/cmd/create.go
index 34e6136..99e3c4a 100644
--- a/cmd/create.go
+++ b/cmd/create.go
@@ -305,7 +305,7 @@ func configureContainer(ctx *cli.Context, c *lxc.Container, spec *specs.Spec) er
}
} else {
// This case catches all kind of virtual and remote filesystems (/dev/pts, /dev/shm, sysfs, procfs, dev ...)
- // It can not be a file because the source file for a bind mount must exist.
+ // It can never be a file because the source file for a bind mount must exist.
if os.IsNotExist(err) {
ms.Options = append(ms.Options, "create=dir")
} else {
@@ -314,34 +314,13 @@ func configureContainer(ctx *cli.Context, c *lxc.Container, spec *specs.Spec) er
}
opts := strings.Join(ms.Options, ",")
- mountDest := ms.Destination
-
- // Resolve mount mountDestinatination paths containing symlinks.
- // Symlinks in mount mountDestination paths are not allowed in LXC.
- // See CVE-2015-1335: Protect container mounts against symlinks
- // and https://github.com/lxc/lxc/commit/592fd47a6245508b79fe6ac819fe6d3b2c1289be
- //
- // So mount targets that contain symlinks must be resolved.
- // e.g k8s service account tokens are mounted to /var/run/secrets/kubernetes.io/serviceaccount
- // TODO Check whether a recursive and generic implementation is required!
- if strings.HasPrefix(mountDest, "/var/run/") {
- stat, err := os.Lstat(filepath.Join(spec.Root.Path, "/var/run/"))
- if err == nil {
- if stat.Mode() & os.ModeSymlink != 0 {
- // resolve the symlink from /var/run to /run
- mountDest = strings.TrimPrefix(mountDest, "/var")
- }
- }
+ mountDest, err := resolveMountDestination(spec.Root.Path, ms.Destination)
+ if err != nil {
+ log.Debugf("resolveMountDestination: %s --> %s (err:%s)", ms.Destination, mountDest, err)
+ } else {
+ log.Debugf("resolveMountDestination: %s --> %s)", ms.Destination, mountDest)
}
- // Mount destinations must be either relative to the container root or absolute to
- // the directory on the host containing the rootfs.
- // LXC simply ignores relative mounts paths to an absolute rootfs.
-
- // See https://github.com/lxc/lxc/issues/2276
- // See man lxc.container.conf #MOUNT POINTS
- mountDest = filepath.Join(spec.Root.Path, mountDest)
-
mnt := fmt.Sprintf("%s %s %s %s", ms.Source, mountDest, ms.Type, opts)
log.Debugf("adding mount entry %q", mnt)
diff --git a/cmd/utils.go b/cmd/utils.go
index 2b6e38e..1241864 100644
--- a/cmd/utils.go
+++ b/cmd/utils.go
@@ -86,3 +86,54 @@ func RunCommand(args ...string) error {
}
return nil
}
+
+func resolveRootfsSymlinks(rootfs string, dst string) (string, error) {
+ stat, err := os.Lstat(dst)
+ if err != nil {
+ return dst, err
+ }
+ if stat.Mode()&os.ModeSymlink == 0 {
+ return dst, nil // not a symlink
+ }
+ target, err := os.Readlink(dst)
+ if err != nil {
+ return dst, err
+ }
+ if !strings.HasPrefix(target, rootfs) {
+ return filepath.Join(rootfs, target), nil
+ }
+ return target, nil
+}
+
+// resolveMountDestination resolves mount destination paths for LXC.
+//
+// Symlinks in mount mount destination paths are not allowed in LXC.
+// See CVE-2015-1335: Protect container mounts against symlinks
+// and https://github.com/lxc/lxc/commit/592fd47a6245508b79fe6ac819fe6d3b2c1289be
+// Mount targets that contain symlinks should be resolved relative to the container rootfs.
+// e.g k8s service account tokens are mounted to /var/run/secrets/kubernetes.io/serviceaccount
+// but /var/run is (mostly) a symlink to /run, so LXC denies to mount the serviceaccount token.
+//
+// The mount destination must be either relative to the container root or absolute to
+// the directory on the host containing the rootfs.
+// LXC simply ignores relative mounts paths to an absolute rootfs.
+// See man lxc.container.conf #MOUNT POINTS
+//
+// The mount option `create=dir` should be set when the error os.ErrNotExist is returned.
+// The non-existent directories are then automatically created by LXC.
+func resolveMountDestination(rootfs string, dst string) (string, error) {
+ dst = strings.TrimPrefix(dst, "/")
+ entries := strings.Split(dst, "/")
+ dstPath := rootfs
+ for i, entry := range entries {
+ dstPath = filepath.Join(dstPath, entry)
+ resolved, err := resolveRootfsSymlinks(rootfs, dstPath)
+ dstPath = resolved
+ if err != nil {
+ // The already resolved path is concatenated with the remaining path to be resolved,
+ // if resolution of path fails at some point.
+ return filepath.Join(dstPath, filepath.Join(entries[i+1:]...)), err
+ }
+ }
+ return dstPath, nil
+}
From ea7ad04bae7982fd270dc9057705632cc8ceeb3d Mon Sep 17 00:00:00 2001
From: Ruben Jenster <r.jenster at drachenfels.de>
Date: Wed, 22 Jul 2020 10:25:25 +0200
Subject: [PATCH 16/19] Set apparmor profile from container spec.
Signed-off-by: Ruben Jenster <r.jenster at drachenfels.de>
---
cmd/create.go | 23 ++++-------------------
1 file changed, 4 insertions(+), 19 deletions(-)
diff --git a/cmd/create.go b/cmd/create.go
index 99e3c4a..a8c81bb 100644
--- a/cmd/create.go
+++ b/cmd/create.go
@@ -223,32 +223,17 @@ func doCreate(ctx *cli.Context) error {
}
func configureContainerSecurity(ctx *cli.Context, c *lxc.Container, spec *specs.Spec) error {
- // https://github.com/kubernetes/kubernetes/blob/a38a02792b55942177ee676a5e1993b18a8b4b0a/pkg/kubelet/apis/cri/runtime/v1alpha2/api.proto#L541
- // // Privileged mode implies the following specific options are applied:
- // 1. All capabilities are added.
- // 2. Sensitive paths, such as kernel module paths within sysfs, are not masked.
- // 3. Any sysfs and procfs mounts are mounted RW.
- // 4. Apparmor confinement is not applied.
- // 5. Seccomp restrictions are not applied.
- // 6. The device cgroup does not restrict access to any devices.
- // 7. All devices from the host's /dev are available within the container.
- // 8. SELinux restrictions are not applied (e.g. label=disabled).
- // security
- // FIXME Kubelet does not set the 'io.kubernetes.cri-o.PrivilegedRuntime"
- // https://github.com/containers/podman/blob/8704b78a6fbb953acb6b74d1671d5ad6456bf81f/pkg/annotations/annotations.go#L64
-
aaprofile := spec.Process.ApparmorProfile
if aaprofile == "" {
aaprofile = "generated"
}
- if err := c.SetConfigItem("lxc.apparmor.profile", "generated"); err != nil {
- //if err := c.SetConfigItem("lxc.apparmor.profile", "unconfined"); err != nil {
- return errors.Wrapf(err, "faield to set apparmor.profile")
+ if err := c.SetConfigItem("lxc.apparmor.profile", aaprofile); err != nil {
+ return errors.Wrapf(err, "failed to set apparmor.profile to %s", aaprofile)
}
if aaprofile == "generated" {
- // TODO Create apparmor profile from spec.Linux.Readonly and MaskedPaths
- // set lxc.apparmor.raw
+ // TODO Create apparmor profile from the spec (honoring Linux.Readonly and Linux.MaskedPaths)
// see man apparmor.d
+ // if err := c.SetConfigItem("lxc.apparmor.raw", aaprofile); err != nil {
}
if err := c.SetConfigItem("lxc.proc.oom_score_adj", fmt.Sprintf("%d", *spec.Process.OOMScoreAdj)); err != nil {
From 5dae906b2304217c29fdeaf33baaf248f2cb44e2 Mon Sep 17 00:00:00 2001
From: Ruben Jenster <r.jenster at drachenfels.de>
Date: Wed, 22 Jul 2020 10:26:22 +0200
Subject: [PATCH 17/19] Fix nil pointer when setting lxc.proc.oom_score_adj.
Signed-off-by: Ruben Jenster <r.jenster at drachenfels.de>
---
cmd/create.go | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/cmd/create.go b/cmd/create.go
index a8c81bb..680ab65 100644
--- a/cmd/create.go
+++ b/cmd/create.go
@@ -236,8 +236,10 @@ func configureContainerSecurity(ctx *cli.Context, c *lxc.Container, spec *specs.
// if err := c.SetConfigItem("lxc.apparmor.raw", aaprofile); err != nil {
}
- if err := c.SetConfigItem("lxc.proc.oom_score_adj", fmt.Sprintf("%d", *spec.Process.OOMScoreAdj)); err != nil {
- return errors.Wrap(err, "failed to set lxc.proc.oom_score_adj")
+ if spec.Process.OOMScoreAdj != nil {
+ if err := c.SetConfigItem("lxc.proc.oom_score_adj", fmt.Sprintf("%d", *spec.Process.OOMScoreAdj)); err != nil {
+ return errors.Wrap(err, "failed to set lxc.proc.oom_score_adj")
+ }
}
if spec.Process.NoNewPrivileges {
From d1c25db70a4ed21f17201d338a199234c39367b4 Mon Sep 17 00:00:00 2001
From: Ruben Jenster <r.jenster at drachenfels.de>
Date: Wed, 22 Jul 2020 10:30:49 +0200
Subject: [PATCH 18/19] Do not set lxc.ephemeral
Signed-off-by: Ruben Jenster <r.jenster at drachenfels.de>
---
cmd/create.go | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/cmd/create.go b/cmd/create.go
index 680ab65..9afc65d 100644
--- a/cmd/create.go
+++ b/cmd/create.go
@@ -248,6 +248,11 @@ func configureContainerSecurity(ctx *cli.Context, c *lxc.Container, spec *specs.
}
}
+ // crio deletes the working directory so lxc should not do this itself
+ //if err := c.SetConfigItem("lxc.ephemeral", "1"); err != nil {
+ // return errors.Wrapf(err, "failed to set lxc.ephemeral")
+ //}
+
return nil
}
From f7b8d584c22009b04d1b2a2f2138c4ef6cfdcefd Mon Sep 17 00:00:00 2001
From: Ruben Jenster <r.jenster at drachenfels.de>
Date: Wed, 22 Jul 2020 10:31:21 +0200
Subject: [PATCH 19/19] Update Makefile test target.
Signed-off-by: Ruben Jenster <r.jenster at drachenfels.de>
---
Makefile | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Makefile b/Makefile
index a6598c3..816a148 100644
--- a/Makefile
+++ b/Makefile
@@ -17,8 +17,8 @@ check: crio-lxc
go test ./...
PACKAGES_DIR=$(PACKAGES_DIR) sudo -E "PATH=$$PATH" bats -t $(patsubst %,test/%.bats,$(TEST))
-%.bats: crio-lxc
- PACKAGES_DIR=$(PACKAGES_DIR) sudo -E "PATH=$$PATH" bats -t test/$@
+test/*.bats: crio-lxc
+ PACKAGES_DIR=$(PACKAGES_DIR) sudo -E "PATH=$$PATH" bats -t $@
.PHONY: vendorup
vendorup:
More information about the lxc-devel
mailing list