[lxc-devel] [lxd/master] seccomp: switch from individual pread() to process_vm_readv()

brauner on Github lxc-bot at linuxcontainers.org
Tue Jul 28 13:27:01 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 411 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200728/1b1ca8ce/attachment.bin>
-------------- next part --------------
From ae1d7737d49b1169d54de72c7e63810a77c260af Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Tue, 28 Jul 2020 13:53:38 +0200
Subject: [PATCH] seccomp: switch from individual pread() to process_vm_readv()

Ensure that we don't cross a page boundary.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/seccomp/seccomp.go | 116 +++++++++++++++++++++++++----------------
 1 file changed, 70 insertions(+), 46 deletions(-)

diff --git a/lxd/seccomp/seccomp.go b/lxd/seccomp/seccomp.go
index 404b4998e5..c08c9ed133 100644
--- a/lxd/seccomp/seccomp.go
+++ b/lxd/seccomp/seccomp.go
@@ -1410,64 +1410,79 @@ func (s *Server) HandleMountSyscall(c Instance, siov *Iovec) int {
 		defer pidFd.Close()
 	}
 
-	// const char *source
-	args.source = ""
+	buf1 := [4096]C.char{}
+	buf2 := [4096]C.char{}
+	buf3 := [4096]C.char{}
+	buf4 := [4096]C.char{}
+
+	// process_vm_readv() doesn't like crossing page boundaries when
+	// reading individual syscall args.
+	bufSize := uint64(4096)
+	if bufSize > pageSize {
+		bufSize = pageSize
+	}
+
+	mntSource := buf1[:bufSize]
+	mntTarget := buf2[:bufSize]
+	mntFs := buf3[:bufSize]
+	mntData := buf4[:bufSize]
+
+	localIov := []unix.Iovec{
+		unix.Iovec{Base: (*byte)(unsafe.Pointer(&mntSource[0]))},
+		unix.Iovec{Base: (*byte)(unsafe.Pointer(&mntTarget[0]))},
+		unix.Iovec{Base: (*byte)(unsafe.Pointer(&mntFs[0]))},
+		unix.Iovec{Base: (*byte)(unsafe.Pointer(&mntData[0]))},
+	}
+
+	remoteIov := []unix.RemoteIovec{
+		unix.RemoteIovec{Base: uintptr(siov.req.data.args[0])},
+		unix.RemoteIovec{Base: uintptr(siov.req.data.args[1])},
+		unix.RemoteIovec{Base: uintptr(siov.req.data.args[2])},
+		unix.RemoteIovec{Base: uintptr(siov.req.data.args[4])},
+	}
+
 	if siov.req.data.args[0] != 0 {
-		cBuf := [unix.PathMax]C.char{}
-		_, err := C.pread(C.int(siov.memFd), unsafe.Pointer(&cBuf[0]), C.size_t(unix.PathMax), C.off_t(siov.req.data.args[0]))
-		if err != nil {
-			ctx["err"] = fmt.Sprintf("Failed to read memory for first argument of mount syscall: %s", err)
-			ctx["syscall_continue"] = "true"
-			C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue))
-			return 0
-		}
-		args.source = C.GoString(&cBuf[0])
+		localIov[0].Len = bufSize
+		remoteIov[0].Len = int(bufSize)
 	}
 
-	// const char *target
-	args.target = ""
 	if siov.req.data.args[1] != 0 {
-		cBuf := [unix.PathMax]C.char{}
-		_, err := C.pread(C.int(siov.memFd), unsafe.Pointer(&cBuf[0]), C.size_t(unix.PathMax), C.off_t(siov.req.data.args[1]))
-		if err != nil {
-			ctx["err"] = fmt.Sprintf("Failed to read memory for second argument of mount syscall: %s", err)
-			ctx["syscall_continue"] = "true"
-			C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue))
-			return 0
-		}
-		args.target = C.GoString(&cBuf[0])
+		localIov[1].Len = bufSize
+		remoteIov[1].Len = int(bufSize)
 	}
 
-	// const char *filesystemtype
-	args.fstype = ""
 	if siov.req.data.args[2] != 0 {
-		cBuf := [unix.PathMax]C.char{}
-		_, err := C.pread(C.int(siov.memFd), unsafe.Pointer(&cBuf[0]), C.size_t(unix.PathMax), C.off_t(siov.req.data.args[2]))
-		if err != nil {
-			ctx["err"] = fmt.Sprintf("Failed to read memory for third argument of mount syscall: %s", err)
-			ctx["syscall_continue"] = "true"
-			C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue))
-			return 0
-		}
-		args.fstype = C.GoString(&cBuf[0])
+		localIov[2].Len = bufSize
+		remoteIov[2].Len = int(bufSize)
+	}
+
+	if siov.req.data.args[4] != 0 {
+		localIov[3].Len = bufSize
+		remoteIov[3].Len = int(bufSize)
 	}
 
+	_, err := unix.ProcessVMReadv(args.pid, localIov, remoteIov, 0)
+	if err != nil {
+		ctx["err"] = fmt.Sprintf("Failed to read process memory of mount syscall: %s", err)
+		ctx["syscall_continue"] = "true"
+		C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue))
+		return 0
+	}
+
+	// const char *source
+	args.source = C.GoString(&mntSource[0])
+	ctx["source"] = args.source
+	// const char *target
+	args.target = C.GoString(&mntTarget[0])
+	ctx["target"] = args.target
+	// const char *filesystemtype
+	args.fstype = C.GoString(&mntFs[0])
+	ctx["fstype"] = args.fstype
 	// unsigned long mountflags
 	args.flags = int(siov.req.data.args[3])
-
 	// const void *data
-	args.data = ""
-	if siov.req.data.args[4] != 0 {
-		cBuf := [unix.PathMax]C.char{}
-		_, err := C.pread(C.int(siov.memFd), unsafe.Pointer(&cBuf[0]), C.size_t(unix.PathMax), C.off_t(siov.req.data.args[4]))
-		if err != nil {
-			ctx["err"] = fmt.Sprintf("Failed to read memory for fifth argument of mount syscall: %s", err)
-			ctx["syscall_continue"] = "true"
-			C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue))
-			return 0
-		}
-		args.data = C.GoString(&cBuf[0])
-	}
+	args.data = C.GoString(&mntData[0])
+	ctx["data"] = args.data
 
 	ok, fuseBinary := s.MountSyscallValid(c, &args)
 	if !ok {
@@ -1745,3 +1760,12 @@ func (s *Server) MountSyscallShift(c Instance) bool {
 
 	return false
 }
+
+var pageSize uint64 = 4096
+
+func init() {
+	tmp := unix.Getpagesize()
+	if tmp > 0 {
+		pageSize = uint64(tmp)
+	}
+}


More information about the lxc-devel mailing list