[lxc-devel] [lxd/master] daemon: check whether shiftfs is useable

brauner on Github lxc-bot at linuxcontainers.org
Mon Aug 3 22:38:59 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 756 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200803/d6d363ab/attachment.bin>
-------------- next part --------------
From 3003758417555f58062eeb75e4494623a31e2f8a Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Tue, 4 Aug 2020 00:34:39 +0200
Subject: [PATCH] daemon: check whether shiftfs is useable

In nested workloads where the outer LXD hasn't used shiftfs the container's
rootfs won't be marked for shifting and since the inner container hasn't
mounted the rootfs itself it's user namespace doesn't own the superblock of the
rootfs filesystem. Since we require CAP_SYS_ADMIN in the owning user namespace
of the superblock shiftfs isn't useable for the inner LXD. Handle that case.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/daemon.go            |  2 +-
 lxd/main_checkfeature.go | 42 ++++++++++++++++++++++++++++++++++++++++
 lxd/main_forksyscall.go  | 22 +--------------------
 lxd/main_nsexec.go       |  2 +-
 4 files changed, 45 insertions(+), 23 deletions(-)

diff --git a/lxd/daemon.go b/lxd/daemon.go
index 9e14c168a5..dde10b17e4 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -694,7 +694,7 @@ func (d *Daemon) init() error {
 	if shared.IsTrue(os.Getenv("LXD_SHIFTFS_DISABLE")) {
 		logger.Infof(" - shiftfs support: disabled")
 	} else {
-		if util.HasFilesystem("shiftfs") || util.LoadModule("shiftfs") == nil {
+		if canUseShiftfs() && (util.HasFilesystem("shiftfs") || util.LoadModule("shiftfs") == nil) {
 			d.os.Shiftfs = true
 			logger.Infof(" - shiftfs support: yes")
 		} else {
diff --git a/lxd/main_checkfeature.go b/lxd/main_checkfeature.go
index b2576b6296..ffabdfe344 100644
--- a/lxd/main_checkfeature.go
+++ b/lxd/main_checkfeature.go
@@ -42,10 +42,12 @@ __ro_after_init bool netnsid_aware = false;
 __ro_after_init bool pidfd_aware = false;
 __ro_after_init bool uevent_aware = false;
 __ro_after_init int seccomp_notify_aware = 0;
+__ro_after_init bool shiftfs_useable = false;
 __ro_after_init char errbuf[4096];
 
 extern int can_inject_uevent(const char *uevent, size_t len);
 extern int wait_for_pid(pid_t pid);
+extern int preserve_ns(pid_t pid, int ns_fd, const char *ns);
 
 static int netns_set_nsid(int fd)
 {
@@ -326,6 +328,37 @@ static void is_pidfd_aware(void)
 	pidfd_aware = true;
 }
 
+static void is_shiftfs_useable(void)
+{
+	int hostmntns_fd;
+
+	hostmntns_fd = preserve_ns(getpid(), -EBADF, "mnt");
+	if (hostmntns_fd < 0) {
+		(void)sprintf(errbuf, "%m - %s", "Failed to preserve host mount namespace");
+		return;
+	}
+
+	if (unshare(CLONE_NEWNS) < 0) {
+		(void)sprintf(errbuf, "%m - %s", "Failed to unshare mount namespace");
+		return;
+	}
+
+	if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0) < 0) {
+		(void)sprintf(errbuf, "%m - %s", "Failed to turn \"/\" into private mount");
+		goto on_error;
+	}
+
+	if (mount("/", "/", "shiftfs", 0, "mark") == 0)
+		shiftfs_useable = true;
+	else
+		(void)sprintf(errbuf, "%m - %s", "Failed to mount shiftfs");
+	umount2("/", MNT_DETACH);
+
+on_error:
+	if (setns(hostmntns_fd, CLONE_NEWNS) < 0)
+		(void)sprintf(errbuf, "%m - %s", "Failed to attach to host mount namespace");
+}
+
 void checkfeature(void)
 {
 	__do_close int hostnetns_fd = -EBADF, newnetns_fd = -EBADF;
@@ -334,6 +367,7 @@ void checkfeature(void)
 	is_pidfd_aware();
 	is_uevent_aware();
 	is_seccomp_notify_aware();
+	is_shiftfs_useable();
 
 	if (setns(hostnetns_fd, CLONE_NEWNET) < 0)
 		(void)sprintf(errbuf, "%s", "Failed to attach to host network namespace");
@@ -370,3 +404,11 @@ func canUseSeccompListenerContinue() bool {
 func canUsePidFds() bool {
 	return bool(C.pidfd_aware)
 }
+
+func canUseShiftfs() bool {
+	if !bool(C.shiftfs_useable) {
+		logger.Debugf("%s", C.GoString(&C.errbuf[0]))
+	}
+
+	return bool(C.shiftfs_useable)
+}
diff --git a/lxd/main_forksyscall.go b/lxd/main_forksyscall.go
index aa76fe36f0..bb148fa7d2 100644
--- a/lxd/main_forksyscall.go
+++ b/lxd/main_forksyscall.go
@@ -36,6 +36,7 @@ extern char* advance_arg(bool required);
 extern void attach_userns_fd(int ns_fd);
 extern int pidfd_nsfd(int pidfd, pid_t pid);
 extern bool setnsat(int ns_fd, const char *ns);
+extern int preserve_ns(const int pid, const char *ns);
 
 static bool chdirchroot_in_mntns(int cwd_fd, int root_fd)
 {
@@ -337,27 +338,6 @@ static int make_tmpfile(char *template, bool dir)
 	return 0;
 }
 
-static int preserve_ns(const int pid, const char *ns)
-{
-	int ret;
-// 5 /proc + 21 /int_as_str + 3 /ns + 20 /NS_NAME + 1 \0
-#define __NS_PATH_LEN 50
-	char path[__NS_PATH_LEN];
-
-	// This way we can use this function to also check whether namespaces
-	// are supported by the kernel by passing in the NULL or the empty
-	// string.
-	ret = snprintf(path, __NS_PATH_LEN, "/proc/%d/ns%s%s", pid,
-		       !ns || strcmp(ns, "") == 0 ? "" : "/",
-		       !ns || strcmp(ns, "") == 0 ? "" : ns);
-	if (ret < 0 || (size_t)ret >= __NS_PATH_LEN) {
-		errno = EFBIG;
-		return -1;
-	}
-
-	return open(path, O_RDONLY | O_CLOEXEC);
-}
-
 static void mount_emulate(void)
 {
 	__do_close int mnt_fd = -EBADF, pidfd = -EBADF, ns_fd = -EBADF;
diff --git a/lxd/main_nsexec.go b/lxd/main_nsexec.go
index 5a652722e2..bbd148ba5a 100644
--- a/lxd/main_nsexec.go
+++ b/lxd/main_nsexec.go
@@ -144,7 +144,7 @@ int dosetns_file(char *file, char *nstype) {
 	return 0;
 }
 
-static int preserve_ns(pid_t pid, int ns_fd, const char *ns)
+int preserve_ns(pid_t pid, int ns_fd, const char *ns)
 {
 	int ret;
 	if (ns_fd >= 0)


More information about the lxc-devel mailing list