[lxc-devel] [lxd/master] exec: switch to close_range()
brauner on Github
lxc-bot at linuxcontainers.org
Sun Aug 9 10:26:14 UTC 2020
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 505 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200809/0fe2c6a3/attachment.bin>
-------------- next part --------------
From 1082d798304f60582b436e61bfeeaba22af8ae90 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Sun, 9 Aug 2020 11:42:04 +0200
Subject: [PATCH 1/2] syscall_numbers: add close_range()
Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
lxd/include/syscall_numbers.h | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/lxd/include/syscall_numbers.h b/lxd/include/syscall_numbers.h
index ee6593ff5b..6c1f1abed1 100644
--- a/lxd/include/syscall_numbers.h
+++ b/lxd/include/syscall_numbers.h
@@ -129,4 +129,24 @@
#endif
#endif
+#ifndef __NR_close_range
+ #if defined __alpha__
+ #define __NR_close_range 546
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_close_range 4436
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_close_range 6436
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_close_range 5436
+ #endif
+ #elif defined __ia64__
+ #define __NR_close_range (436 + 1024)
+ #else
+ #define __NR_close_range 436
+ #endif
+#endif
+
#endif /* __LXD_SYSCALL_NUMBERS_H */
From c1b9f734f0abc8315689234b4f6911fcd55a8dc3 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Sun, 9 Aug 2020 12:24:16 +0200
Subject: [PATCH 2/2] exec: switch to close_range() syscall
Starting with v5.9 we don't need the crazy proc-based loop anymore since we've
implemented the close_range() syscall: detact and use it!
Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
lxd/daemon.go | 7 +++++++
lxd/include/syscall_wrappers.h | 26 ++++++++++++++++++++++++++
lxd/main_checkfeature.go | 21 +++++++++++++++++++++
lxd/main_forkexec.go | 7 ++++++-
lxd/sys/os.go | 1 +
5 files changed, 61 insertions(+), 1 deletion(-)
create mode 100644 lxd/include/syscall_wrappers.h
diff --git a/lxd/daemon.go b/lxd/daemon.go
index 2d62378164..ff4307e974 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -639,6 +639,13 @@ func (d *Daemon) init() error {
// Look for kernel features
logger.Infof("Kernel features:")
+ d.os.CloseRange = canUseCloseRange()
+ if d.os.CloseRange {
+ logger.Infof(" - closing multiple file descriptros efficiently: yes")
+ } else {
+ logger.Infof(" - closing multiple file descriptros efficiently: no")
+ }
+
d.os.NetnsGetifaddrs = canUseNetnsGetifaddrs()
if d.os.NetnsGetifaddrs {
logger.Infof(" - netnsid-based network retrieval: yes")
diff --git a/lxd/include/syscall_wrappers.h b/lxd/include/syscall_wrappers.h
new file mode 100644
index 0000000000..96b900b392
--- /dev/null
+++ b/lxd/include/syscall_wrappers.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#ifndef __LXD_SYSCALL_WRAPPER_H
+#define __LXD_SYSCALL_WRAPPER_H
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+#include <asm/unistd.h>
+#include <errno.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "syscall_numbers.h"
+
+#ifndef CLOSE_RANGE_UNSHARE
+#define CLOSE_RANGE_UNSHARE (1U << 1)
+#endif
+
+static inline int close_range(unsigned int fd, unsigned int max_fd, unsigned int flags)
+{
+ return syscall(__NR_close_range, fd, max_fd, flags);
+}
+
+#endif /* __LXD_SYSCALL_WRAPPER_H */
diff --git a/lxd/main_checkfeature.go b/lxd/main_checkfeature.go
index 77a7f53738..4cebf7a5d0 100644
--- a/lxd/main_checkfeature.go
+++ b/lxd/main_checkfeature.go
@@ -41,7 +41,9 @@ import (
#include "include/memory_utils.h"
#include "include/process_utils.h"
#include "include/syscall_numbers.h"
+#include "include/syscall_wrappers.h"
+__ro_after_init bool close_range_aware = false;
__ro_after_init bool tiocgptpeer_aware = false;
__ro_after_init bool netnsid_aware = false;
__ro_after_init bool pidfd_aware = false;
@@ -487,6 +489,20 @@ static void is_tiocgptpeer_aware(void)
tiocgptpeer_aware = true;
}
+static void is_close_range_aware(void)
+{
+ int fd;
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return;
+
+ if (close_range(fd, fd, CLOSE_RANGE_UNSHARE))
+ return;
+
+ close_range_aware = true;
+}
+
void checkfeature(void)
{
__do_close int hostnetns_fd = -EBADF, newnetns_fd = -EBADF;
@@ -496,6 +512,7 @@ void checkfeature(void)
is_uevent_aware();
is_seccomp_notify_aware();
is_tiocgptpeer_aware();
+ is_close_range_aware();
if (setns(hostnetns_fd, CLONE_NEWNET) < 0)
(void)sprintf(errbuf, "%s", "Failed to attach to host network namespace");
@@ -577,3 +594,7 @@ func canUseShiftfs() bool {
func canUseNativeTerminals() bool {
return bool(C.tiocgptpeer_aware)
}
+
+func canUseCloseRange() bool {
+ return bool(C.close_range_aware)
+}
diff --git a/lxd/main_forkexec.go b/lxd/main_forkexec.go
index cbde207868..96a5fa4ede 100644
--- a/lxd/main_forkexec.go
+++ b/lxd/main_forkexec.go
@@ -27,6 +27,7 @@ import (
#include "include/macro.h"
#include "include/memory_utils.h"
+#include "include/syscall_wrappers.h"
#include <lxc/attach_options.h>
#include <lxc/lxccontainer.h>
@@ -289,7 +290,11 @@ __attribute__ ((noinline)) static int __forkexec(void)
if (!argvp || !*argvp)
return log_error(EXIT_FAILURE, "No command specified");
- ret = close_inherited(fds_to_ignore, ARRAY_SIZE(fds_to_ignore));
+ ret = close_range(EXEC_PIPE_FD + 1, UINT_MAX, CLOSE_RANGE_UNSHARE);
+ if (ret) {
+ if (errno == ENOSYS)
+ ret = close_inherited(fds_to_ignore, ARRAY_SIZE(fds_to_ignore));
+ }
if (ret)
return log_error(EXIT_FAILURE, "Aborting attach to prevent leaking file descriptors into container");
diff --git a/lxd/sys/os.go b/lxd/sys/os.go
index b7faa7d4e3..e18d917ed1 100644
--- a/lxd/sys/os.go
+++ b/lxd/sys/os.go
@@ -63,6 +63,7 @@ type OS struct {
CGInfo cgroup.Info
// Kernel features
+ CloseRange bool
NativeTerminals bool
NetnsGetifaddrs bool
PidFds bool
More information about the lxc-devel
mailing list