[lxc-devel] [lxd/master] seccomp: improve syscall interception

brauner on Github lxc-bot at linuxcontainers.org
Mon Jul 22 13:03:39 UTC 2019


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 364 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20190722/8d896ba2/attachment-0001.bin>
-------------- next part --------------
From 80b0580ac020a55b18dfe316effe66540792ff7b Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Mon, 22 Jul 2019 10:36:35 +0200
Subject: [PATCH 1/8] forksyscall: fix variable declarations

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/main_forksyscall.go | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/lxd/main_forksyscall.go b/lxd/main_forksyscall.go
index 836d7cb0b9..ec67ade370 100644
--- a/lxd/main_forksyscall.go
+++ b/lxd/main_forksyscall.go
@@ -68,18 +68,17 @@ static bool chdirchroot(pid_t pid)
 static void forkmknod()
 {
 	__do_close_prot_errno int cwd_fd = -EBADF, host_target_fd = -EBADF, mnt_fd = -EBADF;
-	int ret;
 	char *cur = NULL, *target = NULL, *target_dir = NULL, *target_host = NULL;
+	int chk_perm_only, ret;
 	char path[PATH_MAX];
-	mode_t mode = 0;
-	dev_t dev = 0;
-	pid_t pid = 0;
-	uid_t fsuid = -1, uid = -1;
-	gid_t fsgid = -1, gid = -1;
+	mode_t mode;
+	dev_t dev;
+	pid_t pid;
+	uid_t fsuid, uid;
+	gid_t fsgid, gid;
 	struct stat s1, s2;
 	struct statfs sfs1, sfs2;
 	cap_t caps;
-	int chk_perm_only;
 
 	pid = atoi(advance_arg(true));
 	target = advance_arg(true);

From 94c6139152a0757465331a9e9f2766e780384692 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Mon, 22 Jul 2019 11:19:51 +0200
Subject: [PATCH 2/8] forksyscall: don't break chdirchroot() with
 setns(CLONE_NEWNS)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Stéphane observed unexpected behavior for the forkmknod() syscall
handler. The reason this failed was the setns(CLONE_NEWNS) breaks the
chdir() and chroot() we performed before.

Reported-by: Stéphane Graber <stgraber at ubuntu.com>
Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/main_forksyscall.go | 29 +++++------------------------
 1 file changed, 5 insertions(+), 24 deletions(-)

diff --git a/lxd/main_forksyscall.go b/lxd/main_forksyscall.go
index ec67ade370..e89d46fd98 100644
--- a/lxd/main_forksyscall.go
+++ b/lxd/main_forksyscall.go
@@ -67,7 +67,7 @@ static bool chdirchroot(pid_t pid)
 // <PID> <root-uid> <root-gid> <path> <mode> <dev>
 static void forkmknod()
 {
-	__do_close_prot_errno int cwd_fd = -EBADF, host_target_fd = -EBADF, mnt_fd = -EBADF;
+	__do_close_prot_errno int cwd_fd = -EBADF, host_target_fd = -EBADF;
 	char *cur = NULL, *target = NULL, *target_dir = NULL, *target_host = NULL;
 	int chk_perm_only, ret;
 	char path[PATH_MAX];
@@ -91,40 +91,21 @@ static void forkmknod()
 	fsgid = atoi(advance_arg(true));
 	chk_perm_only = atoi(advance_arg(true));
 
-	if (*target == '/') {
-		// user has specified an absolute path
-		snprintf(path, sizeof(path), "%s", target);
-		target_dir = dirname(path);
-	} else {
-		// user has specified a relative path
-		snprintf(path, sizeof(path), "/proc/%d/cwd", pid);
-		target_dir = path;
-	}
-	cwd_fd = open(path, O_PATH | O_RDONLY | O_CLOEXEC);
-	if (cwd_fd < 0) {
-		fprintf(stderr, "%d", ENOANO);
-		_exit(EXIT_FAILURE);
-	}
-
 	host_target_fd = open(dirname(target_host), O_PATH | O_RDONLY | O_CLOEXEC);
 	if (host_target_fd < 0) {
 		fprintf(stderr, "%d", ENOANO);
 		_exit(EXIT_FAILURE);
 	}
 
-	snprintf(path, sizeof(path), "/proc/%d/ns/mnt", pid);
-	mnt_fd = open(path, O_RDONLY | O_CLOEXEC);
-	if (mnt_fd < 0) {
-		fprintf(stderr, "%d", ENOANO);
-		_exit(EXIT_FAILURE);
-	}
-
 	if (!chdirchroot(pid)) {
 		fprintf(stderr, "%d", ENOANO);
 		_exit(EXIT_FAILURE);
 	}
 
-	if (setns(mnt_fd, CLONE_NEWNS)) {
+	snprintf(path, sizeof(path), "%s", target);
+	target_dir = dirname(path);
+	cwd_fd = open(target_dir, O_PATH | O_RDONLY | O_CLOEXEC);
+	if (cwd_fd < 0) {
 		fprintf(stderr, "%d", ENOANO);
 		_exit(EXIT_FAILURE);
 	}

From df12569f40f361522830b99a60d039f353c4a123 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Mon, 22 Jul 2019 13:00:48 +0200
Subject: [PATCH 3/8] forksyscall: re-introduce setns(CLONE_NEWNS) properly

We need to attach to the mount namespace to correctly see mounted
filesystems.
This needs some trickery because there's not fchroot() like there is on
some BSDs.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/main_forksyscall.go | 93 +++++++++++++++++++++++++++++++----------
 1 file changed, 71 insertions(+), 22 deletions(-)

diff --git a/lxd/main_forksyscall.go b/lxd/main_forksyscall.go
index e89d46fd98..183b206e13 100644
--- a/lxd/main_forksyscall.go
+++ b/lxd/main_forksyscall.go
@@ -48,16 +48,20 @@ static int fstat_fstatfs(int fd, struct stat *s, struct statfs *sfs)
 	return 0;
 }
 
-static bool chdirchroot(pid_t pid)
+static bool chdirchroot_in_mntns(int cwd_fd, int root_fd)
 {
-	char path[PATH_MAX];
+	ssize_t len;
+	char buf[PATH_MAX];
 
-	snprintf(path, sizeof(path), "/proc/%d/cwd", pid);
-	if (chdir(path))
+	if (fchdir(cwd_fd))
 		return false;
 
-	snprintf(path, sizeof(path), "/proc/%d/root", pid);
-	if (chroot(path))
+	len = readlinkat(root_fd, "", buf, sizeof(buf));
+	if (len < 0 || len >= sizeof(buf))
+		return false;
+	buf[len] = '\0';
+
+	if (chroot(buf))
 		return false;
 
 	return true;
@@ -67,7 +71,7 @@ static bool chdirchroot(pid_t pid)
 // <PID> <root-uid> <root-gid> <path> <mode> <dev>
 static void forkmknod()
 {
-	__do_close_prot_errno int cwd_fd = -EBADF, host_target_fd = -EBADF;
+	__do_close_prot_errno int cwd_fd = -EBADF, host_target_fd = -EBADF, mnt_fd = -EBADF, root_fd = -EBADF, target_dir_fd = -EBADF;
 	char *cur = NULL, *target = NULL, *target_dir = NULL, *target_host = NULL;
 	int chk_perm_only, ret;
 	char path[PATH_MAX];
@@ -97,15 +101,41 @@ static void forkmknod()
 		_exit(EXIT_FAILURE);
 	}
 
-	if (!chdirchroot(pid)) {
+	snprintf(path, sizeof(path), "/proc/%d/ns/mnt", pid);
+	mnt_fd = open(path, O_RDONLY | O_CLOEXEC);
+	if (mnt_fd < 0) {
+		fprintf(stderr, "%d", ENOANO);
+		_exit(EXIT_FAILURE);
+	}
+
+	snprintf(path, sizeof(path), "/proc/%d/root", pid);
+	root_fd = open(path, O_PATH | O_RDONLY | O_CLOEXEC | O_NOFOLLOW);
+	if (root_fd < 0) {
+		fprintf(stderr, "%d", ENOANO);
+		_exit(EXIT_FAILURE);
+	}
+
+	snprintf(path, sizeof(path), "/proc/%d/cwd", pid);
+	cwd_fd = open(path, O_RDONLY | O_CLOEXEC);
+	if (cwd_fd < 0) {
+		fprintf(stderr, "%d", ENOANO);
+		_exit(EXIT_FAILURE);
+	}
+
+	if (setns(mnt_fd, CLONE_NEWNS)) {
+		fprintf(stderr, "%d", ENOANO);
+		_exit(EXIT_FAILURE);
+	}
+
+	if (!chdirchroot_in_mntns(cwd_fd, root_fd)) {
 		fprintf(stderr, "%d", ENOANO);
 		_exit(EXIT_FAILURE);
 	}
 
 	snprintf(path, sizeof(path), "%s", target);
 	target_dir = dirname(path);
-	cwd_fd = open(target_dir, O_PATH | O_RDONLY | O_CLOEXEC);
-	if (cwd_fd < 0) {
+	target_dir_fd = open(target_dir, O_PATH | O_RDONLY | O_CLOEXEC);
+	if (target_dir_fd < 0) {
 		fprintf(stderr, "%d", ENOANO);
 		_exit(EXIT_FAILURE);
 	}
@@ -144,7 +174,7 @@ static void forkmknod()
 		_exit(EXIT_FAILURE);
 	}
 
-	ret = fstat_fstatfs(cwd_fd, &s2, &sfs2);
+	ret = fstat_fstatfs(target_dir_fd, &s2, &sfs2);
 	if (ret) {
 		fprintf(stderr, "%d", ENOANO);
 		_exit(EXIT_FAILURE);
@@ -173,7 +203,7 @@ static void forkmknod()
 
 	// basename() can modify its argument so accessing target_host is
 	// invalid from now on.
-	ret = mknodat(cwd_fd, target, mode, dev);
+	ret = mknodat(target_dir_fd, target, mode, dev);
 	if (ret) {
 		fprintf(stderr, "%d", errno);
 		_exit(EXIT_FAILURE);
@@ -184,7 +214,7 @@ static void forkmknod()
 #define CLONE_NEWCGROUP	0x02000000
 #endif
 
-const char *ns_names[] = { "user", "mnt", "pid", "uts", "ipc", "net", "cgroup", NULL };
+const char *ns_names[] = { "user", "pid", "uts", "ipc", "net", "cgroup", NULL };
 
 static bool setnsat(int ns_fd, const char *ns)
 {
@@ -227,10 +257,10 @@ static bool change_creds(int ns_fd, cap_t caps, uid_t nsuid, gid_t nsgid, uid_t
 
 static void forksetxattr()
 {
-	__do_close_prot_errno int ns_fd = -EBADF, target_fd = -EBADF;
+	__do_close_prot_errno int cwd_fd = -EBADF, mnt_fd = -EBADF, ns_fd = -EBADF, root_fd = -EBADF, target_fd = -EBADF;
 	int flags = 0;
-	char *name, *path;
-	char ns_path[PATH_MAX];
+	char *name, *target;
+	char path[PATH_MAX];
 	uid_t nsfsuid, nsuid;
 	gid_t nsfsgid, nsgid;
 	pid_t pid = 0;
@@ -246,25 +276,44 @@ static void forksetxattr()
 	nsfsuid = atoi(advance_arg(true));
 	nsfsgid = atoi(advance_arg(true));
 	name = advance_arg(true);
-	path = advance_arg(true);
+	target = advance_arg(true);
 	flags = atoi(advance_arg(true));
 	whiteout = atoi(advance_arg(true));
 	size = atoi(advance_arg(true));
 	data = advance_arg(true);
 
-	snprintf(ns_path, sizeof(ns_path), "/proc/%d/ns", pid);
-	ns_fd = open(ns_path, O_RDONLY | O_CLOEXEC);
+	snprintf(path, sizeof(path), "/proc/%d/ns", pid);
+	ns_fd = open(path, O_RDONLY | O_CLOEXEC);
 	if (ns_fd < 0) {
 		fprintf(stderr, "%d", ENOANO);
 		_exit(EXIT_FAILURE);
 	}
 
-	if (!chdirchroot(pid)) {
-		fprintf(stderr, "%d", errno);
+	snprintf(path, sizeof(path), "/proc/%d/root", pid);
+	root_fd = open(path, O_PATH| O_RDONLY | O_CLOEXEC | O_NOFOLLOW);
+	if (root_fd < 0) {
+		fprintf(stderr, "%d", ENOANO);
+		_exit(EXIT_FAILURE);
+	}
+
+	snprintf(path, sizeof(path), "/proc/%d/ns/mnt", pid);
+	mnt_fd = open(path, O_RDONLY | O_CLOEXEC);
+	if (mnt_fd < 0) {
+		fprintf(stderr, "%d", ENOANO);
+		_exit(EXIT_FAILURE);
+	}
+
+	if (setns(mnt_fd, CLONE_NEWNS)) {
+		fprintf(stderr, "%d", ENOANO);
+		_exit(EXIT_FAILURE);
+	}
+
+	if (!chdirchroot_in_mntns(cwd_fd, root_fd)) {
+		fprintf(stderr, "%d", ENOANO);
 		_exit(EXIT_FAILURE);
 	}
 
-	target_fd = open(path, O_RDONLY | O_CLOEXEC);
+	target_fd = open(target, O_RDONLY | O_CLOEXEC);
 	if (target_fd < 0) {
 		fprintf(stderr, "%d", errno);
 		_exit(EXIT_FAILURE);

From cdf94832a21f5fec2639189b379a3679f50f7ef7 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Mon, 22 Jul 2019 13:36:57 +0200
Subject: [PATCH 4/8] forksyscall: remove st_ino check from same_fsinfo()

When on shiftfs the inode number will never be identical to the one used
on the host. This causes us to unnecessarily error out prematurely and
use mount injection.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/main_forksyscall.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lxd/main_forksyscall.go b/lxd/main_forksyscall.go
index 183b206e13..32cb8b6908 100644
--- a/lxd/main_forksyscall.go
+++ b/lxd/main_forksyscall.go
@@ -34,7 +34,7 @@ extern int dosetns(int pid, char *nstype);
 static inline bool same_fsinfo(struct stat *s1, struct stat *s2,
 			       struct statfs *sfs1, struct statfs *sfs2)
 {
-	return ((sfs1->f_type == sfs2->f_type) && (s1->st_dev == s2->st_dev) && (s1->st_ino == s2->st_ino));
+	return ((sfs1->f_type == sfs2->f_type) && (s1->st_dev == s2->st_dev));
 }
 
 static int fstat_fstatfs(int fd, struct stat *s, struct statfs *sfs)

From 3c4e2c2351c957dfb7cc7a001b5570c69e08dfaf Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Mon, 22 Jul 2019 13:40:25 +0200
Subject: [PATCH 5/8] seccomp: remove shiftfs special-casing

We shouldn't only check permissions for shiftfs. Instead, we should try
whether the mknod call succeeds and retry with mount injection when it
fails expectedly.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/main_forksyscall.go | 14 ++++++--------
 lxd/seccomp.go          | 32 +++++++-------------------------
 2 files changed, 13 insertions(+), 33 deletions(-)

diff --git a/lxd/main_forksyscall.go b/lxd/main_forksyscall.go
index 32cb8b6908..ff6f107063 100644
--- a/lxd/main_forksyscall.go
+++ b/lxd/main_forksyscall.go
@@ -73,7 +73,7 @@ static void forkmknod()
 {
 	__do_close_prot_errno int cwd_fd = -EBADF, host_target_fd = -EBADF, mnt_fd = -EBADF, root_fd = -EBADF, target_dir_fd = -EBADF;
 	char *cur = NULL, *target = NULL, *target_dir = NULL, *target_host = NULL;
-	int chk_perm_only, ret;
+	int ret;
 	char path[PATH_MAX];
 	mode_t mode;
 	dev_t dev;
@@ -93,7 +93,6 @@ static void forkmknod()
 	gid = atoi(advance_arg(true));
 	fsuid = atoi(advance_arg(true));
 	fsgid = atoi(advance_arg(true));
-	chk_perm_only = atoi(advance_arg(true));
 
 	host_target_fd = open(dirname(target_host), O_PATH | O_RDONLY | O_CLOEXEC);
 	if (host_target_fd < 0) {
@@ -196,16 +195,15 @@ static void forkmknod()
 		_exit(EXIT_FAILURE);
 	}
 
-	if (chk_perm_only) {
-		fprintf(stderr, "%d", ENOMEDIUM);
-		_exit(EXIT_FAILURE);
-	}
-
 	// basename() can modify its argument so accessing target_host is
 	// invalid from now on.
 	ret = mknodat(target_dir_fd, target, mode, dev);
 	if (ret) {
-		fprintf(stderr, "%d", errno);
+		if (errno == EPERM)
+			fprintf(stderr, "%d", ENOMEDIUM);
+		else
+			fprintf(stderr, "%d", errno);
+
 		_exit(EXIT_FAILURE);
 	}
 }
diff --git a/lxd/seccomp.go b/lxd/seccomp.go
index fb904a0368..e26373ebae 100644
--- a/lxd/seccomp.go
+++ b/lxd/seccomp.go
@@ -863,7 +863,7 @@ func taskIds(pid int) (error, int64, int64, int64, int64) {
 	return nil, uid, gid, fsuid, fsgid
 }
 
-func CallForkmknod(c container, dev types.Device, requestPID int, permissionsOnly int) int {
+func CallForkmknod(c container, dev types.Device, requestPID int) int {
 	rootLink := fmt.Sprintf("/proc/%d/root", requestPID)
 	rootPath, err := os.Readlink(rootLink)
 	if err != nil {
@@ -892,8 +892,7 @@ func CallForkmknod(c container, dev types.Device, requestPID int, permissionsOnl
 		"forksyscall", "mknod", dev["pid"], dev["path"],
 		dev["mode_t"], dev["dev_t"], dev["hostpath"],
 		fmt.Sprintf("%d", uid), fmt.Sprintf("%d", gid),
-		fmt.Sprintf("%d", fsuid), fmt.Sprintf("%d", fsgid),
-		fmt.Sprintf("%d", permissionsOnly))
+		fmt.Sprintf("%d", fsuid), fmt.Sprintf("%d", fsgid))
 	if err != nil {
 		errno, err := strconv.Atoi(stderr)
 		if err != nil || errno == C.ENOANO {
@@ -922,11 +921,6 @@ type MknodArgs struct {
 }
 
 func (s *SeccompServer) doDeviceSyscall(c container, args *MknodArgs, siov *SeccompIovec) int {
-	diskIdmap, err := c.DiskIdmap()
-	if err != nil {
-		return int(-C.EPERM)
-	}
-
 	dev := types.Device{}
 	dev["type"] = "unix-char"
 	dev["mode"] = fmt.Sprintf("%#o", args.cMode)
@@ -937,26 +931,12 @@ func (s *SeccompServer) doDeviceSyscall(c container, args *MknodArgs, siov *Secc
 	dev["mode_t"] = fmt.Sprintf("%d", args.cMode)
 	dev["dev_t"] = fmt.Sprintf("%d", args.cDev)
 
-	if s.d.os.Shiftfs && !c.IsPrivileged() && diskIdmap == nil {
-		errno := CallForkmknod(c, dev, int(args.cPid), 1)
-		if errno != int(-C.ENOMEDIUM) {
-			return errno
-		}
-
-		err = c.InsertSeccompUnixDevice(fmt.Sprintf("forkmknod.unix.%d", int(args.cPid)), dev, int(args.cPid))
-		if err != nil {
-			return int(-C.EPERM)
-		}
-
-		return 0
-	}
-
-	errno := CallForkmknod(c, dev, int(args.cPid), 0)
+	errno := CallForkmknod(c, dev, int(args.cPid))
 	if errno != int(-C.ENOMEDIUM) {
 		return errno
 	}
 
-	err = c.InsertSeccompUnixDevice(fmt.Sprintf("forkmknod.unix.%d", int(args.cPid)), dev, int(args.cPid))
+	err := c.InsertSeccompUnixDevice(fmt.Sprintf("forkmknod.unix.%d", int(args.cPid)), dev, int(args.cPid))
 	if err != nil {
 		return int(-C.EPERM)
 	}
@@ -1007,7 +987,9 @@ func (s *SeccompServer) HandleMknodatSyscall(c container, siov *SeccompIovec) in
 			"seccomp_notify_flags": siov.req.flags,
 		})
 
-	if int(siov.req.data.args[0]) != int(C.AT_FDCWD) {
+	// Make sure to handle 64bit kernel, 32bit container/userspace, LXD
+	// built on 64bit userspace correctly.
+	if int32(siov.req.data.args[0]) != int32(C.AT_FDCWD) {
 		logger.Debugf("Non AT_FDCWD mknodat calls are not allowed")
 		return int(-C.EINVAL)
 	}

From 19aa048a33f747ffe98c54f40d251ff773df4b3e Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Mon, 22 Jul 2019 14:29:44 +0200
Subject: [PATCH 6/8] forksyscall: harden open()-calls via O_PATH and
 O_DIRECTORY

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/main_forksyscall.go | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lxd/main_forksyscall.go b/lxd/main_forksyscall.go
index ff6f107063..141477ff8d 100644
--- a/lxd/main_forksyscall.go
+++ b/lxd/main_forksyscall.go
@@ -94,7 +94,7 @@ static void forkmknod()
 	fsuid = atoi(advance_arg(true));
 	fsgid = atoi(advance_arg(true));
 
-	host_target_fd = open(dirname(target_host), O_PATH | O_RDONLY | O_CLOEXEC);
+	host_target_fd = open(dirname(target_host), O_PATH | O_RDONLY | O_CLOEXEC | O_DIRECTORY);
 	if (host_target_fd < 0) {
 		fprintf(stderr, "%d", ENOANO);
 		_exit(EXIT_FAILURE);
@@ -115,7 +115,7 @@ static void forkmknod()
 	}
 
 	snprintf(path, sizeof(path), "/proc/%d/cwd", pid);
-	cwd_fd = open(path, O_RDONLY | O_CLOEXEC);
+	cwd_fd = open(path, O_PATH | O_RDONLY | O_CLOEXEC);
 	if (cwd_fd < 0) {
 		fprintf(stderr, "%d", ENOANO);
 		_exit(EXIT_FAILURE);
@@ -133,7 +133,7 @@ static void forkmknod()
 
 	snprintf(path, sizeof(path), "%s", target);
 	target_dir = dirname(path);
-	target_dir_fd = open(target_dir, O_PATH | O_RDONLY | O_CLOEXEC);
+	target_dir_fd = open(target_dir, O_PATH | O_RDONLY | O_CLOEXEC | O_DIRECTORY);
 	if (target_dir_fd < 0) {
 		fprintf(stderr, "%d", ENOANO);
 		_exit(EXIT_FAILURE);
@@ -281,14 +281,14 @@ static void forksetxattr()
 	data = advance_arg(true);
 
 	snprintf(path, sizeof(path), "/proc/%d/ns", pid);
-	ns_fd = open(path, O_RDONLY | O_CLOEXEC);
+	ns_fd = open(path, O_PATH | O_RDONLY | O_CLOEXEC | O_DIRECTORY);
 	if (ns_fd < 0) {
 		fprintf(stderr, "%d", ENOANO);
 		_exit(EXIT_FAILURE);
 	}
 
 	snprintf(path, sizeof(path), "/proc/%d/root", pid);
-	root_fd = open(path, O_PATH| O_RDONLY | O_CLOEXEC | O_NOFOLLOW);
+	root_fd = open(path, O_PATH | O_RDONLY | O_CLOEXEC | O_NOFOLLOW);
 	if (root_fd < 0) {
 		fprintf(stderr, "%d", ENOANO);
 		_exit(EXIT_FAILURE);

From 1c1a570493fcd41f107aeb7537d307dbf90b9caf Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Mon, 22 Jul 2019 14:56:02 +0200
Subject: [PATCH 7/8] forksyscall: remove same_fsinfo() logic completely

The same_fsinfo() logic was needed when we didn't attach to the mount
namespace of the calling task.
Now it is actually becoming a problem. Think of the scenario where you
attach a storage volume to a container at /var/lib/. Now we want to
verify that the taret directory in which the device is supposed to be
created is on the same filesystem on the host and in the container. But
we won't be able to open the target directory from the host since it
only exists in the container's mount namespace because we attached a
mount. That means this logic is broken. Wipe it. It should be secure
since I changed the logic to always attach to the mount namespace.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/main_forksyscall.go | 26 ++++----------------------
 1 file changed, 4 insertions(+), 22 deletions(-)

diff --git a/lxd/main_forksyscall.go b/lxd/main_forksyscall.go
index 141477ff8d..1f03cc1294 100644
--- a/lxd/main_forksyscall.go
+++ b/lxd/main_forksyscall.go
@@ -71,7 +71,7 @@ static bool chdirchroot_in_mntns(int cwd_fd, int root_fd)
 // <PID> <root-uid> <root-gid> <path> <mode> <dev>
 static void forkmknod()
 {
-	__do_close_prot_errno int cwd_fd = -EBADF, host_target_fd = -EBADF, mnt_fd = -EBADF, root_fd = -EBADF, target_dir_fd = -EBADF;
+	__do_close_prot_errno int cwd_fd = -EBADF, mnt_fd = -EBADF, root_fd = -EBADF, target_dir_fd = -EBADF;
 	char *cur = NULL, *target = NULL, *target_dir = NULL, *target_host = NULL;
 	int ret;
 	char path[PATH_MAX];
@@ -80,8 +80,7 @@ static void forkmknod()
 	pid_t pid;
 	uid_t fsuid, uid;
 	gid_t fsgid, gid;
-	struct stat s1, s2;
-	struct statfs sfs1, sfs2;
+	struct statfs sfs;
 	cap_t caps;
 
 	pid = atoi(advance_arg(true));
@@ -94,12 +93,6 @@ static void forkmknod()
 	fsuid = atoi(advance_arg(true));
 	fsgid = atoi(advance_arg(true));
 
-	host_target_fd = open(dirname(target_host), O_PATH | O_RDONLY | O_CLOEXEC | O_DIRECTORY);
-	if (host_target_fd < 0) {
-		fprintf(stderr, "%d", ENOANO);
-		_exit(EXIT_FAILURE);
-	}
-
 	snprintf(path, sizeof(path), "/proc/%d/ns/mnt", pid);
 	mnt_fd = open(path, O_RDONLY | O_CLOEXEC);
 	if (mnt_fd < 0) {
@@ -173,28 +166,17 @@ static void forkmknod()
 		_exit(EXIT_FAILURE);
 	}
 
-	ret = fstat_fstatfs(target_dir_fd, &s2, &sfs2);
+	ret = fstatfs(target_dir_fd, &sfs);
 	if (ret) {
 		fprintf(stderr, "%d", ENOANO);
 		_exit(EXIT_FAILURE);
 	}
 
-	if (sfs2.f_flags & MS_NODEV) {
+	if (sfs.f_flags & MS_NODEV) {
 		fprintf(stderr, "%d", EPERM);
 		_exit(EXIT_FAILURE);
 	}
 
-	ret = fstat_fstatfs(host_target_fd, &s1, &sfs1);
-	if (ret) {
-		fprintf(stderr, "%d", ENOANO);
-		_exit(EXIT_FAILURE);
-	}
-
-	if (!same_fsinfo(&s1, &s2, &sfs1, &sfs2)) {
-		fprintf(stderr, "%d", ENOMEDIUM);
-		_exit(EXIT_FAILURE);
-	}
-
 	// basename() can modify its argument so accessing target_host is
 	// invalid from now on.
 	ret = mknodat(target_dir_fd, target, mode, dev);

From 5d457fb84179c9e290432ad918c9b0bbbb67aa21 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Mon, 22 Jul 2019 15:02:04 +0200
Subject: [PATCH 8/8] tests: s/which/command -v/g

On Disco shellcheck keeps yelling at me because we use which and not
command -v.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 test/includes/check.sh          |  2 +-
 test/includes/lxc.sh            |  2 +-
 test/includes/lxd.sh            |  2 +-
 test/includes/net.sh            |  2 +-
 test/includes/storage.sh        |  2 +-
 test/suites/deps.sh             |  2 +-
 test/suites/fuidshift.sh        |  2 +-
 test/suites/migration.sh        |  2 +-
 test/suites/static_analysis.sh  | 12 ++++++------
 test/suites/storage_profiles.sh |  4 ++--
 10 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/test/includes/check.sh b/test/includes/check.sh
index a0ff10abb3..7c41e2f7c4 100644
--- a/test/includes/check.sh
+++ b/test/includes/check.sh
@@ -6,7 +6,7 @@ check_dependencies() {
     missing=""
 
     for dep in "$@"; do
-        if ! which "$dep" >/dev/null 2>&1; then
+        if ! command -v "$dep" >/dev/null 2>&1; then
             [ "$missing" ] && missing="$missing $dep" || missing="$dep"
         fi
     done
diff --git a/test/includes/lxc.sh b/test/includes/lxc.sh
index 101b423ec9..92b786cd34 100644
--- a/test/includes/lxc.sh
+++ b/test/includes/lxc.sh
@@ -10,7 +10,7 @@ lxc_remote() {
     local injected cmd arg
 
     injected=0
-    cmd=$(which lxc)
+    cmd=$(command -v lxc)
 
     # shellcheck disable=SC2048,SC2068
     for arg in "$@"; do
diff --git a/test/includes/lxd.sh b/test/includes/lxd.sh
index 0e31fd0629..0355dfbf0c 100644
--- a/test/includes/lxd.sh
+++ b/test/includes/lxd.sh
@@ -284,7 +284,7 @@ wait_for() {
 }
 
 wipe() {
-    if which btrfs >/dev/null 2>&1; then
+    if command -v btrfs >/dev/null 2>&1; then
         rm -Rf "${1}" 2>/dev/null || true
         if [ -d "${1}" ]; then
             find "${1}" | tac | xargs btrfs subvolume delete >/dev/null 2>&1 || true
diff --git a/test/includes/net.sh b/test/includes/net.sh
index 1b55125232..2eeb63194c 100644
--- a/test/includes/net.sh
+++ b/test/includes/net.sh
@@ -2,7 +2,7 @@
 
 # Return an available random local port
 local_tcp_port() {
-    if which python3 >/dev/null 2>&1; then
+    if command -v python3 >/dev/null 2>&1; then
         (
             cat << EOF
 import socket
diff --git a/test/includes/storage.sh b/test/includes/storage.sh
index c45c87111a..2ca8ff06a1 100644
--- a/test/includes/storage.sh
+++ b/test/includes/storage.sh
@@ -32,7 +32,7 @@ available_storage_backends() {
     fi
 
     for backend in $storage_backends; do
-        if which "$backend" >/dev/null 2>&1; then
+        if command -v "$backend" >/dev/null 2>&1; then
             backends="$backends $backend"
         fi
     done
diff --git a/test/suites/deps.sh b/test/suites/deps.sh
index 69a1c879b0..e3f4ccc42a 100644
--- a/test/suites/deps.sh
+++ b/test/suites/deps.sh
@@ -1,3 +1,3 @@
 test_check_deps() {
-  ! ldd "$(which lxc)" | grep -q liblxc || false
+  ! ldd "$(command -v lxc)" | grep -q liblxc || false
 }
diff --git a/test/suites/fuidshift.sh b/test/suites/fuidshift.sh
index 5dd929548a..ff6d9c0b80 100644
--- a/test/suites/fuidshift.sh
+++ b/test/suites/fuidshift.sh
@@ -44,7 +44,7 @@ test_root_fuidshift() {
 }
 
 test_fuidshift() {
-  if ! which fuidshift >/dev/null 2>&1; then
+  if ! command -v fuidshift >/dev/null 2>&1; then
     echo "==> SKIP: No fuidshift binary could be found"
     return
   fi
diff --git a/test/suites/migration.sh b/test/suites/migration.sh
index 5dd0bcbacf..8bc5a6733c 100644
--- a/test/suites/migration.sh
+++ b/test/suites/migration.sh
@@ -361,7 +361,7 @@ migration() {
   lxc_remote project switch l1 default
   lxc_remote project delete l1:proj
 
-  if ! which criu >/dev/null 2>&1; then
+  if ! command -v criu >/dev/null 2>&1; then
     echo "==> SKIP: live migration with CRIU (missing binary)"
     return
   fi
diff --git a/test/suites/static_analysis.sh b/test/suites/static_analysis.sh
index 91faee47c5..49dbc7434a 100644
--- a/test/suites/static_analysis.sh
+++ b/test/suites/static_analysis.sh
@@ -13,14 +13,14 @@ test_static_analysis() {
 
     cd ../
     # Python3 static analysis
-    if which flake8 >/dev/null 2>&1; then
+    if command -v flake8 >/dev/null 2>&1; then
       flake8 test/deps/import-busybox
     else
       echo "flake8 not found, python static analysis disabled"
     fi
 
     # Shell static analysis
-    if which shellcheck >/dev/null 2>&1; then
+    if command -v shellcheck >/dev/null 2>&1; then
       shellcheck --shell sh test/*.sh test/includes/*.sh test/suites/*.sh test/backends/*.sh
     else
       echo "shellcheck not found, shell static analysis disabled"
@@ -53,12 +53,12 @@ test_static_analysis() {
     fi
 
     ## vet
-    if which vet >/dev/null 2>&1; then
+    if command -v vet >/dev/null 2>&1; then
       vet --all .
     fi
 
     ## golint
-    if which golint >/dev/null 2>&1; then
+    if command -v golint >/dev/null 2>&1; then
       golint -set_exit_status client/
 
       golint -set_exit_status fuidshift/
@@ -110,7 +110,7 @@ test_static_analysis() {
     fi
 
     ## deadcode
-    if which deadcode >/dev/null 2>&1; then
+    if command -v deadcode >/dev/null 2>&1; then
       OUT=$(deadcode ./fuidshift ./lxc ./lxd ./lxd/types ./shared ./shared/api ./shared/i18n ./shared/ioprogress ./shared/logging ./shared/osarch ./shared/simplestreams ./shared/termios ./shared/version ./lxd-benchmark 2>&1 | grep -v lxd/migrate.pb.go: | grep -v /C: | grep -vi _cgo | grep -vi _cfunc || true)
       if [ -n "${OUT}" ]; then
         echo "${OUT}" >&2
@@ -127,7 +127,7 @@ test_static_analysis() {
     fi
 
     ## misspell
-    if which misspell >/dev/null 2>&1; then
+    if command -v misspell >/dev/null 2>&1; then
       OUT=$(misspell ./ | grep -v po/ | grep -Ev "test/includes/lxd.sh.*monitord" | grep -Ev "test/suites/static_analysis.sh.*monitord" || true)
       if [ -n "${OUT}" ]; then
         echo "Found some typos"
diff --git a/test/suites/storage_profiles.sh b/test/suites/storage_profiles.sh
index b59e388e4e..a2fb19cee9 100644
--- a/test/suites/storage_profiles.sh
+++ b/test/suites/storage_profiles.sh
@@ -10,12 +10,12 @@ test_storage_profiles() {
     LXD_DIR="${LXD_STORAGE_DIR}"
 
     HAS_ZFS="dir"
-    if which zfs >/dev/null 2>&1; then
+    if command -v zfs >/dev/null 2>&1; then
       HAS_ZFS="zfs"
     fi
 
     HAS_BTRFS="dir"
-    if which btrfs >/dev/null 2>&1; then
+    if command -v btrfs >/dev/null 2>&1; then
       HAS_BTRFS="btrfs"
     fi
 


More information about the lxc-devel mailing list