[lxc-devel] [lxd/master] exec: switch to close_range()

brauner on Github lxc-bot at linuxcontainers.org
Sun Aug 9 10:26:14 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 505 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200809/0fe2c6a3/attachment.bin>
-------------- next part --------------
From 1082d798304f60582b436e61bfeeaba22af8ae90 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Sun, 9 Aug 2020 11:42:04 +0200
Subject: [PATCH 1/2] syscall_numbers: add close_range()

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/include/syscall_numbers.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/lxd/include/syscall_numbers.h b/lxd/include/syscall_numbers.h
index ee6593ff5b..6c1f1abed1 100644
--- a/lxd/include/syscall_numbers.h
+++ b/lxd/include/syscall_numbers.h
@@ -129,4 +129,24 @@
 	#endif
 #endif
 
+#ifndef __NR_close_range
+	#if defined __alpha__
+		#define __NR_close_range 546
+	#elif defined _MIPS_SIM
+		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
+			#define __NR_close_range 4436
+		#endif
+		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
+			#define __NR_close_range 6436
+		#endif
+		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+			#define __NR_close_range 5436
+		#endif
+	#elif defined __ia64__
+		#define __NR_close_range (436 + 1024)
+	#else
+		#define __NR_close_range 436
+	#endif
+#endif
+
 #endif /* __LXD_SYSCALL_NUMBERS_H */

From c1b9f734f0abc8315689234b4f6911fcd55a8dc3 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Sun, 9 Aug 2020 12:24:16 +0200
Subject: [PATCH 2/2] exec: switch to close_range() syscall

Starting with v5.9 we don't need the crazy proc-based loop anymore since we've
implemented the close_range() syscall: detact and use it!

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/daemon.go                  |  7 +++++++
 lxd/include/syscall_wrappers.h | 26 ++++++++++++++++++++++++++
 lxd/main_checkfeature.go       | 21 +++++++++++++++++++++
 lxd/main_forkexec.go           |  7 ++++++-
 lxd/sys/os.go                  |  1 +
 5 files changed, 61 insertions(+), 1 deletion(-)
 create mode 100644 lxd/include/syscall_wrappers.h

diff --git a/lxd/daemon.go b/lxd/daemon.go
index 2d62378164..ff4307e974 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -639,6 +639,13 @@ func (d *Daemon) init() error {
 
 	// Look for kernel features
 	logger.Infof("Kernel features:")
+	d.os.CloseRange = canUseCloseRange()
+	if d.os.CloseRange {
+		logger.Infof(" - closing multiple file descriptros efficiently: yes")
+	} else {
+		logger.Infof(" - closing multiple file descriptros efficiently: no")
+	}
+
 	d.os.NetnsGetifaddrs = canUseNetnsGetifaddrs()
 	if d.os.NetnsGetifaddrs {
 		logger.Infof(" - netnsid-based network retrieval: yes")
diff --git a/lxd/include/syscall_wrappers.h b/lxd/include/syscall_wrappers.h
new file mode 100644
index 0000000000..96b900b392
--- /dev/null
+++ b/lxd/include/syscall_wrappers.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#ifndef __LXD_SYSCALL_WRAPPER_H
+#define __LXD_SYSCALL_WRAPPER_H
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+#include <asm/unistd.h>
+#include <errno.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "syscall_numbers.h"
+
+#ifndef CLOSE_RANGE_UNSHARE
+#define CLOSE_RANGE_UNSHARE (1U << 1)
+#endif
+
+static inline int close_range(unsigned int fd, unsigned int max_fd, unsigned int flags)
+{
+	return syscall(__NR_close_range, fd, max_fd, flags);
+}
+
+#endif /* __LXD_SYSCALL_WRAPPER_H */
diff --git a/lxd/main_checkfeature.go b/lxd/main_checkfeature.go
index 77a7f53738..4cebf7a5d0 100644
--- a/lxd/main_checkfeature.go
+++ b/lxd/main_checkfeature.go
@@ -41,7 +41,9 @@ import (
 #include "include/memory_utils.h"
 #include "include/process_utils.h"
 #include "include/syscall_numbers.h"
+#include "include/syscall_wrappers.h"
 
+__ro_after_init bool close_range_aware = false;
 __ro_after_init bool tiocgptpeer_aware = false;
 __ro_after_init bool netnsid_aware = false;
 __ro_after_init bool pidfd_aware = false;
@@ -487,6 +489,20 @@ static void is_tiocgptpeer_aware(void)
 	tiocgptpeer_aware = true;
 }
 
+static void is_close_range_aware(void)
+{
+	int fd;
+
+	fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+	if (fd < 0)
+		return;
+
+	if (close_range(fd, fd, CLOSE_RANGE_UNSHARE))
+		return;
+
+	close_range_aware = true;
+}
+
 void checkfeature(void)
 {
 	__do_close int hostnetns_fd = -EBADF, newnetns_fd = -EBADF;
@@ -496,6 +512,7 @@ void checkfeature(void)
 	is_uevent_aware();
 	is_seccomp_notify_aware();
 	is_tiocgptpeer_aware();
+	is_close_range_aware();
 
 	if (setns(hostnetns_fd, CLONE_NEWNET) < 0)
 		(void)sprintf(errbuf, "%s", "Failed to attach to host network namespace");
@@ -577,3 +594,7 @@ func canUseShiftfs() bool {
 func canUseNativeTerminals() bool {
 	return bool(C.tiocgptpeer_aware)
 }
+
+func canUseCloseRange() bool {
+	return bool(C.close_range_aware)
+}
diff --git a/lxd/main_forkexec.go b/lxd/main_forkexec.go
index cbde207868..96a5fa4ede 100644
--- a/lxd/main_forkexec.go
+++ b/lxd/main_forkexec.go
@@ -27,6 +27,7 @@ import (
 
 #include "include/macro.h"
 #include "include/memory_utils.h"
+#include "include/syscall_wrappers.h"
 #include <lxc/attach_options.h>
 #include <lxc/lxccontainer.h>
 
@@ -289,7 +290,11 @@ __attribute__ ((noinline)) static int __forkexec(void)
 	if (!argvp || !*argvp)
 		return log_error(EXIT_FAILURE, "No command specified");
 
-	ret = close_inherited(fds_to_ignore, ARRAY_SIZE(fds_to_ignore));
+	ret = close_range(EXEC_PIPE_FD + 1, UINT_MAX, CLOSE_RANGE_UNSHARE);
+	if (ret) {
+		if (errno == ENOSYS)
+			ret = close_inherited(fds_to_ignore, ARRAY_SIZE(fds_to_ignore));
+	}
 	if (ret)
 		return log_error(EXIT_FAILURE, "Aborting attach to prevent leaking file descriptors into container");
 
diff --git a/lxd/sys/os.go b/lxd/sys/os.go
index b7faa7d4e3..e18d917ed1 100644
--- a/lxd/sys/os.go
+++ b/lxd/sys/os.go
@@ -63,6 +63,7 @@ type OS struct {
 	CGInfo cgroup.Info
 
 	// Kernel features
+	CloseRange              bool
 	NativeTerminals         bool
 	NetnsGetifaddrs         bool
 	PidFds                  bool


More information about the lxc-devel mailing list