[lxc-devel] [lxd/master] seccomp: fix mount emulation

brauner on Github lxc-bot at linuxcontainers.org
Fri Oct 16 10:18:31 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 364 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20201016/759e3c7b/attachment.bin>
-------------- next part --------------
From b76ffcd374cf5577892fff39a13fa56e33ee117f Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 16 Oct 2020 10:21:55 +0200
Subject: [PATCH 1/3] seccomp: switch back to pread()

process_vm_readv() can read memory from another task if it goes away.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/seccomp/seccomp.go | 112 ++++++++++++++++++-----------------------
 1 file changed, 49 insertions(+), 63 deletions(-)

diff --git a/lxd/seccomp/seccomp.go b/lxd/seccomp/seccomp.go
index 7a4553dee2..3959fab4b4 100644
--- a/lxd/seccomp/seccomp.go
+++ b/lxd/seccomp/seccomp.go
@@ -1620,66 +1620,67 @@ func (s *Server) HandleMountSyscall(c Instance, siov *Iovec) int {
 		defer pidFd.Close()
 	}
 
-	buf1 := [4096]C.char{}
-	buf2 := [4096]C.char{}
-	buf3 := [4096]C.char{}
-	buf4 := [4096]C.char{}
-
-	// process_vm_readv() doesn't like crossing page boundaries when
-	// reading individual syscall args.
-	bufSize := 4096
-	if bufSize > pageSize {
-		bufSize = pageSize
-	}
-
-	mntSource := buf1[:bufSize]
-	mntTarget := buf2[:bufSize]
-	mntFs := buf3[:bufSize]
-	mntData := buf4[:bufSize]
-
-	localIov := []unix.Iovec{
-		{Base: (*byte)(unsafe.Pointer(&mntSource[0]))},
-		{Base: (*byte)(unsafe.Pointer(&mntTarget[0]))},
-		{Base: (*byte)(unsafe.Pointer(&mntFs[0]))},
-		{Base: (*byte)(unsafe.Pointer(&mntData[0]))},
-	}
-
-	remoteIov := []unix.RemoteIovec{
-		{Base: uintptr(siov.req.data.args[0])},
-		{Base: uintptr(siov.req.data.args[1])},
-		{Base: uintptr(siov.req.data.args[2])},
-		{Base: uintptr(siov.req.data.args[4])},
-	}
+	mntSource := [unix.PathMax]C.char{}
+	mntTarget := [unix.PathMax]C.char{}
+	mntFs := [unix.PathMax]C.char{}
+	mntData := [unix.PathMax]C.char{}
 
+	// const char *source
 	if siov.req.data.args[0] != 0 {
-		localIov[0].SetLen(bufSize)
-		remoteIov[0].Len = bufSize
+		_, err := C.pread(C.int(siov.memFd), unsafe.Pointer(&mntSource[0]), C.size_t(unix.PathMax), C.off_t(siov.req.data.args[0]))
+		if err != nil {
+			ctx["err"] = fmt.Sprintf("Failed to read source path for of mount syscall: %s", err)
+			ctx["syscall_continue"] = "true"
+			C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue))
+			return 0
+		}
 	}
+	args.source = C.GoString(&mntSource[0])
+	ctx["source"] = args.source
 
+	// const char *target
 	if siov.req.data.args[1] != 0 {
-		localIov[1].SetLen(bufSize)
-		remoteIov[1].Len = bufSize
+		_, err := C.pread(C.int(siov.memFd), unsafe.Pointer(&mntTarget[0]), C.size_t(unix.PathMax), C.off_t(siov.req.data.args[1]))
+		if err != nil {
+			ctx["err"] = fmt.Sprintf("Failed to read target path for of mount syscall: %s", err)
+			ctx["syscall_continue"] = "true"
+			C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue))
+			return 0
+		}
 	}
+	args.target = C.GoString(&mntTarget[0])
+	ctx["target"] = args.target
 
-	if siov.req.data.args[2] != 0 {
-		localIov[2].SetLen(bufSize)
-		remoteIov[2].Len = bufSize
+	// const char *filesystemtype
+	if siov.req.data.args[1] != 0 {
+		_, err := C.pread(C.int(siov.memFd), unsafe.Pointer(&mntFs[0]), C.size_t(unix.PathMax), C.off_t(siov.req.data.args[2]))
+		if err != nil {
+			ctx["err"] = fmt.Sprintf("Failed to read fstype for of mount syscall: %s", err)
+			ctx["syscall_continue"] = "true"
+			C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue))
+			return 0
+		}
 	}
+	args.fstype = C.GoString(&mntFs[0])
+	ctx["fstype"] = args.fstype
 
-	if siov.req.data.args[4] != 0 {
-		localIov[3].SetLen(bufSize)
-		remoteIov[3].Len = bufSize
-	}
+	// unsigned long mountflags
+	args.flags = int(siov.req.data.args[3])
 
-	_, err := unix.ProcessVMReadv(args.pid, localIov, remoteIov, 0)
-	if err != nil {
-		ctx["err"] = fmt.Sprintf("Failed to read process memory of mount syscall: %s", err)
-		ctx["syscall_continue"] = "true"
-		C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue))
-		return 0
+	// const void *data
+	if siov.req.data.args[4] != 0 {
+		_, err := C.pread(C.int(siov.memFd), unsafe.Pointer(&mntData[0]), C.size_t(unix.PathMax), C.off_t(siov.req.data.args[4]))
+		if err != nil {
+			ctx["err"] = fmt.Sprintf("Failed to read mount data for of mount syscall: %s", err)
+			ctx["syscall_continue"] = "true"
+			C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue))
+			return 0
+		}
 	}
+	args.data = C.GoString(&mntData[0])
+	ctx["data"] = args.data
 
-	err = shared.PidfdSendSignal(int(pidFd.Fd()), 0, 0)
+	err := shared.PidfdSendSignal(int(pidFd.Fd()), 0, 0)
 	if err != nil {
 		ctx["err"] = fmt.Sprintf("Failed to send signal to target process for of mount syscall: %s", err)
 		ctx["syscall_continue"] = "true"
@@ -1687,21 +1688,6 @@ func (s *Server) HandleMountSyscall(c Instance, siov *Iovec) int {
 		return 0
 	}
 
-	// const char *source
-	args.source = C.GoString(&mntSource[0])
-	ctx["source"] = args.source
-	// const char *target
-	args.target = C.GoString(&mntTarget[0])
-	ctx["target"] = args.target
-	// const char *filesystemtype
-	args.fstype = C.GoString(&mntFs[0])
-	ctx["fstype"] = args.fstype
-	// unsigned long mountflags
-	args.flags = int(siov.req.data.args[3])
-	// const void *data
-	args.data = C.GoString(&mntData[0])
-	ctx["data"] = args.data
-
 	ok, fuseBinary := s.MountSyscallValid(c, &args)
 	if !ok {
 		ctx["syscall_continue"] = "true"

From 1dd4a0b1705fcf55f581ff0dcfceb5ec24979098 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 16 Oct 2020 10:54:26 +0200
Subject: [PATCH 2/3] nsexec: simplify userns attach

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/main_nsexec.go | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/lxd/main_nsexec.go b/lxd/main_nsexec.go
index 3918547b57..3bfe019832 100644
--- a/lxd/main_nsexec.go
+++ b/lxd/main_nsexec.go
@@ -142,7 +142,7 @@ int preserve_ns(pid_t pid, int ns_fd, const char *ns)
 // If the two processes are not in the same namespace returns an fd to the
 // namespace of the second process identified by @pid2. If the two processes are
 // in the same namespace returns -EINVAL, -1 if an error occurred.
-static int in_same_namespace(pid_t pid1, pid_t pid2, int ns_fd_pid2, const char *ns)
+static int in_same_namespace(pid_t pid1, int ns_fd_pid2, const char *ns)
 {
 	__do_close int ns_fd1 = -EBADF, ns_fd2 = -EBADF;
 	int ret = -1;
@@ -158,7 +158,7 @@ static int in_same_namespace(pid_t pid1, pid_t pid2, int ns_fd_pid2, const char
 		return -1;
 	}
 
-	ns_fd2 = preserve_ns(pid2, ns_fd_pid2, ns);
+	ns_fd2 = preserve_ns(-ESRCH, ns_fd_pid2, ns);
 	if (ns_fd2 < 0)
 		return -1;
 
@@ -178,12 +178,12 @@ static int in_same_namespace(pid_t pid1, pid_t pid2, int ns_fd_pid2, const char
 	return move_fd(ns_fd2);
 }
 
-static void __attach_userns(int pid, int ns_fd)
+void attach_userns_fd(int ns_fd)
 {
 	__do_close int userns_fd = -EBADF;
 	int ret;
 
-	userns_fd = in_same_namespace(getpid(), pid, ns_fd, "user");
+	userns_fd = in_same_namespace(getpid(), ns_fd, "user");
 	if (userns_fd < 0) {
 		if (userns_fd == -EINVAL)
 			return;
@@ -216,16 +216,6 @@ static void __attach_userns(int pid, int ns_fd)
 	}
 }
 
-void attach_userns(int pid)
-{
-	return __attach_userns(pid, -EBADF);
-}
-
-void attach_userns_fd(int ns_fd)
-{
-	return __attach_userns(-1, ns_fd);
-}
-
 int pidfd_nsfd(int pidfd, pid_t pid)
 {
 	__do_close int ns_fd = -EBADF;

From d57813b900c4a9ae3b55b22fb87baf18e1780f2b Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 16 Oct 2020 12:17:16 +0200
Subject: [PATCH 3/3] forksyscall: preserve root and cwd fds for shifted mount
 emulation

Otherwise we won't be able to reacquire the root and current working directory
of the target task.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/main_forksyscall.go | 43 +++++++++++++++++++++++++++++++++--------
 1 file changed, 35 insertions(+), 8 deletions(-)

diff --git a/lxd/main_forksyscall.go b/lxd/main_forksyscall.go
index a5b9c565da..be2839f2a0 100644
--- a/lxd/main_forksyscall.go
+++ b/lxd/main_forksyscall.go
@@ -57,7 +57,8 @@ static bool chdirchroot_in_mntns(int cwd_fd, int root_fd)
 	return true;
 }
 
-static bool acquire_basic_creds(pid_t pid, int pidfd, int ns_fd)
+static bool acquire_basic_creds(pid_t pid, int pidfd, int ns_fd,
+				int *rootfd, int *cwdfd)
 {
 	__do_close int cwd_fd = -EBADF, root_fd = -EBADF;
 	char buf[256];
@@ -75,7 +76,27 @@ static bool acquire_basic_creds(pid_t pid, int pidfd, int ns_fd)
 	if (!change_namespaces(pidfd, ns_fd, CLONE_NEWNS))
 		return false;
 
-	return chdirchroot_in_mntns(cwd_fd, root_fd);
+	if (!chdirchroot_in_mntns(cwd_fd, root_fd))
+		return false;
+
+	if (rootfd)
+		*rootfd = move_fd(root_fd);
+
+	if (cwdfd)
+		*cwdfd = move_fd(cwd_fd);
+	return true;
+}
+
+static bool reacquire_basic_creds(int pidfd, int ns_fd,
+				  int root_fd, int cwd_fd)
+{
+	if (!change_namespaces(pidfd, ns_fd, CLONE_NEWNS))
+		return false;
+
+	if (!chdirchroot_in_mntns(cwd_fd, root_fd))
+		return false;
+
+	return true;
 }
 
 static bool acquire_final_creds(pid_t pid, uid_t uid, gid_t gid, uid_t fsuid, gid_t fsgid)
@@ -157,7 +178,7 @@ static void mknod_emulate(void)
 	fsuid = atoi(advance_arg(true));
 	fsgid = atoi(advance_arg(true));
 
-	if (!acquire_basic_creds(pid, pidfd, ns_fd)) {
+	if (!acquire_basic_creds(pid, pidfd, ns_fd, NULL, NULL)) {
 		fprintf(stderr, "%d", ENOANO);
 		_exit(EXIT_FAILURE);
 	}
@@ -257,7 +278,7 @@ static void setxattr_emulate(void)
 	size = atoi(advance_arg(true));
 	data = advance_arg(true);
 
-	if (!acquire_basic_creds(pid, pidfd, ns_fd)) {
+	if (!acquire_basic_creds(pid, pidfd, ns_fd, NULL, NULL)) {
 		fprintf(stderr, "%d", ENOANO);
 		_exit(EXIT_FAILURE);
 	}
@@ -336,7 +357,8 @@ static int make_tmpfile(char *template, bool dir)
 
 static void mount_emulate(void)
 {
-	__do_close int mnt_fd = -EBADF, pidfd = -EBADF, ns_fd = -EBADF;
+	__do_close int mnt_fd = -EBADF, pidfd = -EBADF, ns_fd = -EBADF,
+		   root_fd = -EBADF, cwd_fd = -EBADF;
 	char *source = NULL, *shiftfs = NULL, *target = NULL, *fstype = NULL;
 	bool use_fuse;
 	uid_t nsuid = -1, uid = -1, nsfsuid = -1, fsuid = -1;
@@ -384,7 +406,7 @@ static void mount_emulate(void)
 		change_namespaces(pidfd, ns_fd, CLONE_NEWPID);
 	}
 
-	if (!acquire_basic_creds(pid, pidfd, ns_fd))
+	if (!acquire_basic_creds(pid, pidfd, ns_fd, &root_fd, &cwd_fd))
 		_exit(EXIT_FAILURE);
 
 	if (!acquire_final_creds(pid, uid, gid, fsuid, fsgid))
@@ -441,8 +463,13 @@ static void mount_emulate(void)
 			_exit(EXIT_FAILURE);
 		}
 
-		attach_userns_fd(ns_fd);
-		if (!acquire_basic_creds(pid, pidfd, ns_fd)) {
+		if (!change_namespaces(pidfd, ns_fd, CLONE_NEWUSER)) {
+			umount2(target, MNT_DETACH);
+			umount2(target, MNT_DETACH);
+			_exit(EXIT_FAILURE);
+		}
+
+		if (!reacquire_basic_creds(pidfd, ns_fd, root_fd, cwd_fd)) {
 			umount2(target, MNT_DETACH);
 			umount2(target, MNT_DETACH);
 			_exit(EXIT_FAILURE);


More information about the lxc-devel mailing list