[lxc-devel] [lxd/master] seccomp: fix mount emulation
brauner on Github
lxc-bot at linuxcontainers.org
Fri Oct 16 10:18:31 UTC 2020
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 364 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20201016/759e3c7b/attachment.bin>
-------------- next part --------------
From b76ffcd374cf5577892fff39a13fa56e33ee117f Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 16 Oct 2020 10:21:55 +0200
Subject: [PATCH 1/3] seccomp: switch back to pread()
process_vm_readv() can read memory from another task if it goes away.
Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
lxd/seccomp/seccomp.go | 112 ++++++++++++++++++-----------------------
1 file changed, 49 insertions(+), 63 deletions(-)
diff --git a/lxd/seccomp/seccomp.go b/lxd/seccomp/seccomp.go
index 7a4553dee2..3959fab4b4 100644
--- a/lxd/seccomp/seccomp.go
+++ b/lxd/seccomp/seccomp.go
@@ -1620,66 +1620,67 @@ func (s *Server) HandleMountSyscall(c Instance, siov *Iovec) int {
defer pidFd.Close()
}
- buf1 := [4096]C.char{}
- buf2 := [4096]C.char{}
- buf3 := [4096]C.char{}
- buf4 := [4096]C.char{}
-
- // process_vm_readv() doesn't like crossing page boundaries when
- // reading individual syscall args.
- bufSize := 4096
- if bufSize > pageSize {
- bufSize = pageSize
- }
-
- mntSource := buf1[:bufSize]
- mntTarget := buf2[:bufSize]
- mntFs := buf3[:bufSize]
- mntData := buf4[:bufSize]
-
- localIov := []unix.Iovec{
- {Base: (*byte)(unsafe.Pointer(&mntSource[0]))},
- {Base: (*byte)(unsafe.Pointer(&mntTarget[0]))},
- {Base: (*byte)(unsafe.Pointer(&mntFs[0]))},
- {Base: (*byte)(unsafe.Pointer(&mntData[0]))},
- }
-
- remoteIov := []unix.RemoteIovec{
- {Base: uintptr(siov.req.data.args[0])},
- {Base: uintptr(siov.req.data.args[1])},
- {Base: uintptr(siov.req.data.args[2])},
- {Base: uintptr(siov.req.data.args[4])},
- }
+ mntSource := [unix.PathMax]C.char{}
+ mntTarget := [unix.PathMax]C.char{}
+ mntFs := [unix.PathMax]C.char{}
+ mntData := [unix.PathMax]C.char{}
+ // const char *source
if siov.req.data.args[0] != 0 {
- localIov[0].SetLen(bufSize)
- remoteIov[0].Len = bufSize
+ _, err := C.pread(C.int(siov.memFd), unsafe.Pointer(&mntSource[0]), C.size_t(unix.PathMax), C.off_t(siov.req.data.args[0]))
+ if err != nil {
+ ctx["err"] = fmt.Sprintf("Failed to read source path for of mount syscall: %s", err)
+ ctx["syscall_continue"] = "true"
+ C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue))
+ return 0
+ }
}
+ args.source = C.GoString(&mntSource[0])
+ ctx["source"] = args.source
+ // const char *target
if siov.req.data.args[1] != 0 {
- localIov[1].SetLen(bufSize)
- remoteIov[1].Len = bufSize
+ _, err := C.pread(C.int(siov.memFd), unsafe.Pointer(&mntTarget[0]), C.size_t(unix.PathMax), C.off_t(siov.req.data.args[1]))
+ if err != nil {
+ ctx["err"] = fmt.Sprintf("Failed to read target path for of mount syscall: %s", err)
+ ctx["syscall_continue"] = "true"
+ C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue))
+ return 0
+ }
}
+ args.target = C.GoString(&mntTarget[0])
+ ctx["target"] = args.target
- if siov.req.data.args[2] != 0 {
- localIov[2].SetLen(bufSize)
- remoteIov[2].Len = bufSize
+ // const char *filesystemtype
+ if siov.req.data.args[1] != 0 {
+ _, err := C.pread(C.int(siov.memFd), unsafe.Pointer(&mntFs[0]), C.size_t(unix.PathMax), C.off_t(siov.req.data.args[2]))
+ if err != nil {
+ ctx["err"] = fmt.Sprintf("Failed to read fstype for of mount syscall: %s", err)
+ ctx["syscall_continue"] = "true"
+ C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue))
+ return 0
+ }
}
+ args.fstype = C.GoString(&mntFs[0])
+ ctx["fstype"] = args.fstype
- if siov.req.data.args[4] != 0 {
- localIov[3].SetLen(bufSize)
- remoteIov[3].Len = bufSize
- }
+ // unsigned long mountflags
+ args.flags = int(siov.req.data.args[3])
- _, err := unix.ProcessVMReadv(args.pid, localIov, remoteIov, 0)
- if err != nil {
- ctx["err"] = fmt.Sprintf("Failed to read process memory of mount syscall: %s", err)
- ctx["syscall_continue"] = "true"
- C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue))
- return 0
+ // const void *data
+ if siov.req.data.args[4] != 0 {
+ _, err := C.pread(C.int(siov.memFd), unsafe.Pointer(&mntData[0]), C.size_t(unix.PathMax), C.off_t(siov.req.data.args[4]))
+ if err != nil {
+ ctx["err"] = fmt.Sprintf("Failed to read mount data for of mount syscall: %s", err)
+ ctx["syscall_continue"] = "true"
+ C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue))
+ return 0
+ }
}
+ args.data = C.GoString(&mntData[0])
+ ctx["data"] = args.data
- err = shared.PidfdSendSignal(int(pidFd.Fd()), 0, 0)
+ err := shared.PidfdSendSignal(int(pidFd.Fd()), 0, 0)
if err != nil {
ctx["err"] = fmt.Sprintf("Failed to send signal to target process for of mount syscall: %s", err)
ctx["syscall_continue"] = "true"
@@ -1687,21 +1688,6 @@ func (s *Server) HandleMountSyscall(c Instance, siov *Iovec) int {
return 0
}
- // const char *source
- args.source = C.GoString(&mntSource[0])
- ctx["source"] = args.source
- // const char *target
- args.target = C.GoString(&mntTarget[0])
- ctx["target"] = args.target
- // const char *filesystemtype
- args.fstype = C.GoString(&mntFs[0])
- ctx["fstype"] = args.fstype
- // unsigned long mountflags
- args.flags = int(siov.req.data.args[3])
- // const void *data
- args.data = C.GoString(&mntData[0])
- ctx["data"] = args.data
-
ok, fuseBinary := s.MountSyscallValid(c, &args)
if !ok {
ctx["syscall_continue"] = "true"
From 1dd4a0b1705fcf55f581ff0dcfceb5ec24979098 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 16 Oct 2020 10:54:26 +0200
Subject: [PATCH 2/3] nsexec: simplify userns attach
Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
lxd/main_nsexec.go | 18 ++++--------------
1 file changed, 4 insertions(+), 14 deletions(-)
diff --git a/lxd/main_nsexec.go b/lxd/main_nsexec.go
index 3918547b57..3bfe019832 100644
--- a/lxd/main_nsexec.go
+++ b/lxd/main_nsexec.go
@@ -142,7 +142,7 @@ int preserve_ns(pid_t pid, int ns_fd, const char *ns)
// If the two processes are not in the same namespace returns an fd to the
// namespace of the second process identified by @pid2. If the two processes are
// in the same namespace returns -EINVAL, -1 if an error occurred.
-static int in_same_namespace(pid_t pid1, pid_t pid2, int ns_fd_pid2, const char *ns)
+static int in_same_namespace(pid_t pid1, int ns_fd_pid2, const char *ns)
{
__do_close int ns_fd1 = -EBADF, ns_fd2 = -EBADF;
int ret = -1;
@@ -158,7 +158,7 @@ static int in_same_namespace(pid_t pid1, pid_t pid2, int ns_fd_pid2, const char
return -1;
}
- ns_fd2 = preserve_ns(pid2, ns_fd_pid2, ns);
+ ns_fd2 = preserve_ns(-ESRCH, ns_fd_pid2, ns);
if (ns_fd2 < 0)
return -1;
@@ -178,12 +178,12 @@ static int in_same_namespace(pid_t pid1, pid_t pid2, int ns_fd_pid2, const char
return move_fd(ns_fd2);
}
-static void __attach_userns(int pid, int ns_fd)
+void attach_userns_fd(int ns_fd)
{
__do_close int userns_fd = -EBADF;
int ret;
- userns_fd = in_same_namespace(getpid(), pid, ns_fd, "user");
+ userns_fd = in_same_namespace(getpid(), ns_fd, "user");
if (userns_fd < 0) {
if (userns_fd == -EINVAL)
return;
@@ -216,16 +216,6 @@ static void __attach_userns(int pid, int ns_fd)
}
}
-void attach_userns(int pid)
-{
- return __attach_userns(pid, -EBADF);
-}
-
-void attach_userns_fd(int ns_fd)
-{
- return __attach_userns(-1, ns_fd);
-}
-
int pidfd_nsfd(int pidfd, pid_t pid)
{
__do_close int ns_fd = -EBADF;
From d57813b900c4a9ae3b55b22fb87baf18e1780f2b Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 16 Oct 2020 12:17:16 +0200
Subject: [PATCH 3/3] forksyscall: preserve root and cwd fds for shifted mount
emulation
Otherwise we won't be able to reacquire the root and current working directory
of the target task.
Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
lxd/main_forksyscall.go | 43 +++++++++++++++++++++++++++++++++--------
1 file changed, 35 insertions(+), 8 deletions(-)
diff --git a/lxd/main_forksyscall.go b/lxd/main_forksyscall.go
index a5b9c565da..be2839f2a0 100644
--- a/lxd/main_forksyscall.go
+++ b/lxd/main_forksyscall.go
@@ -57,7 +57,8 @@ static bool chdirchroot_in_mntns(int cwd_fd, int root_fd)
return true;
}
-static bool acquire_basic_creds(pid_t pid, int pidfd, int ns_fd)
+static bool acquire_basic_creds(pid_t pid, int pidfd, int ns_fd,
+ int *rootfd, int *cwdfd)
{
__do_close int cwd_fd = -EBADF, root_fd = -EBADF;
char buf[256];
@@ -75,7 +76,27 @@ static bool acquire_basic_creds(pid_t pid, int pidfd, int ns_fd)
if (!change_namespaces(pidfd, ns_fd, CLONE_NEWNS))
return false;
- return chdirchroot_in_mntns(cwd_fd, root_fd);
+ if (!chdirchroot_in_mntns(cwd_fd, root_fd))
+ return false;
+
+ if (rootfd)
+ *rootfd = move_fd(root_fd);
+
+ if (cwdfd)
+ *cwdfd = move_fd(cwd_fd);
+ return true;
+}
+
+static bool reacquire_basic_creds(int pidfd, int ns_fd,
+ int root_fd, int cwd_fd)
+{
+ if (!change_namespaces(pidfd, ns_fd, CLONE_NEWNS))
+ return false;
+
+ if (!chdirchroot_in_mntns(cwd_fd, root_fd))
+ return false;
+
+ return true;
}
static bool acquire_final_creds(pid_t pid, uid_t uid, gid_t gid, uid_t fsuid, gid_t fsgid)
@@ -157,7 +178,7 @@ static void mknod_emulate(void)
fsuid = atoi(advance_arg(true));
fsgid = atoi(advance_arg(true));
- if (!acquire_basic_creds(pid, pidfd, ns_fd)) {
+ if (!acquire_basic_creds(pid, pidfd, ns_fd, NULL, NULL)) {
fprintf(stderr, "%d", ENOANO);
_exit(EXIT_FAILURE);
}
@@ -257,7 +278,7 @@ static void setxattr_emulate(void)
size = atoi(advance_arg(true));
data = advance_arg(true);
- if (!acquire_basic_creds(pid, pidfd, ns_fd)) {
+ if (!acquire_basic_creds(pid, pidfd, ns_fd, NULL, NULL)) {
fprintf(stderr, "%d", ENOANO);
_exit(EXIT_FAILURE);
}
@@ -336,7 +357,8 @@ static int make_tmpfile(char *template, bool dir)
static void mount_emulate(void)
{
- __do_close int mnt_fd = -EBADF, pidfd = -EBADF, ns_fd = -EBADF;
+ __do_close int mnt_fd = -EBADF, pidfd = -EBADF, ns_fd = -EBADF,
+ root_fd = -EBADF, cwd_fd = -EBADF;
char *source = NULL, *shiftfs = NULL, *target = NULL, *fstype = NULL;
bool use_fuse;
uid_t nsuid = -1, uid = -1, nsfsuid = -1, fsuid = -1;
@@ -384,7 +406,7 @@ static void mount_emulate(void)
change_namespaces(pidfd, ns_fd, CLONE_NEWPID);
}
- if (!acquire_basic_creds(pid, pidfd, ns_fd))
+ if (!acquire_basic_creds(pid, pidfd, ns_fd, &root_fd, &cwd_fd))
_exit(EXIT_FAILURE);
if (!acquire_final_creds(pid, uid, gid, fsuid, fsgid))
@@ -441,8 +463,13 @@ static void mount_emulate(void)
_exit(EXIT_FAILURE);
}
- attach_userns_fd(ns_fd);
- if (!acquire_basic_creds(pid, pidfd, ns_fd)) {
+ if (!change_namespaces(pidfd, ns_fd, CLONE_NEWUSER)) {
+ umount2(target, MNT_DETACH);
+ umount2(target, MNT_DETACH);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!reacquire_basic_creds(pidfd, ns_fd, root_fd, cwd_fd)) {
umount2(target, MNT_DETACH);
umount2(target, MNT_DETACH);
_exit(EXIT_FAILURE);
More information about the lxc-devel
mailing list