[lxc-devel] [lxcfs/master] lxcfs: add --pidfd option

brauner on Github lxc-bot at linuxcontainers.org
Wed Feb 26 13:09:02 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 434 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200226/ed999722/attachment.bin>
-------------- next part --------------
From 20400f482bb66a5312a2dfaa2be4220184f590e5 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 26 Feb 2020 14:08:08 +0100
Subject: [PATCH] lxcfs: add --pidfd option

This makes running lxcfs way more reliable. It is opt-in for now.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 Makefile.am  |   2 +-
 bindings.c   | 210 +++++++++++++++++++++++++++++++--------------------
 bindings.h   |   5 +-
 configure.ac |  10 +++
 lxcfs.c      |   4 +
 utils.h      |  23 ++++++
 6 files changed, 171 insertions(+), 83 deletions(-)

diff --git a/Makefile.am b/Makefile.am
index 41f3249..5f64cfb 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -7,7 +7,7 @@ AM_CFLAGS = -Wall -ggdb -D_GNU_SOURCE -DSBINDIR=\"$(SBINDIR)\" -pthread
 AM_CFLAGS += $(FUSE_CFLAGS)
 AM_CFLAGS += -DLIBDIR=\"$(LIBDIR)\"
 AM_LDFLAGS = $(FUSE_LIBS) -pthread
-#AM_CFLAGS += -DDEBUG
+AM_CFLAGS += -DDEBUG
 #AM_CFLAGS += -DVERBOSE
 
 AM_CFLAGS += -DRUNTIME_PATH=\"$(RUNTIME_PATH)\"
diff --git a/bindings.c b/bindings.c
index 7bf5d28..f5a6415 100644
--- a/bindings.c
+++ b/bindings.c
@@ -53,6 +53,8 @@
 #include "proc_cpuview.h"
 #include "utils.h"
 
+static bool can_use_pidfd;
+
 /* Define pivot_root() if missing from the C library */
 #ifndef HAVE_PIVOT_ROOT
 static int pivot_root(const char *new_root, const char *put_old)
@@ -83,9 +85,10 @@ extern int pivot_root(const char *new_root, const char *put_old);
  *	 cached initpid.
  */
 struct pidns_init_store {
-	ino_t ino;          // inode number for /proc/$pid/ns/pid
-	pid_t initpid;      // the pid of nit in that ns
-	long int ctime;     // the time at which /proc/$initpid was created
+	ino_t ino;     /* inode number for /proc/$pid/ns/pid */
+	pid_t initpid; /* the pid of nit in that ns */
+	int init_pidfd;
+	long int ctime; /* the time at which /proc/$initpid was created */
 	struct pidns_init_store *next;
 	long int lastcheck;
 };
@@ -127,47 +130,62 @@ static void store_unlock(void)
 	unlock_mutex(&pidns_store_mutex);
 }
 
+/* /proc/       =    6
+ *                +
+ * <pid-as-str> =   INTTYPE_TO_STRLEN(pid_t)
+ *                +
+ * \0           =    1
+ */
+#define LXCFS_PROC_PID_LEN \
+	(STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(uint64_t) + +1)
+
 /* Must be called under store_lock */
-static bool initpid_still_valid(struct pidns_init_store *e, struct stat *nsfdsb)
+static bool initpid_still_valid(struct pidns_init_store *entry)
 {
-	struct stat initsb;
-	char fnam[100];
+	bool valid = true;
 
-	snprintf(fnam, 100, "/proc/%d", e->initpid);
-	if (stat(fnam, &initsb) < 0)
-		return false;
+	if (entry->init_pidfd >= 0) {
+		if (pidfd_send_signal(entry->init_pidfd, 0, NULL, 0))
+			valid = false;
+	} else {
+		struct stat st;
+		char path[LXCFS_PROC_PID_LEN];
 
-	lxcfs_debug("Comparing ctime %ld == %ld for pid %d.\n", e->ctime,
-		    initsb.st_ctime, e->initpid);
+		snprintf(path, sizeof(path), "/proc/%d", entry->initpid);
 
-	if (e->ctime != initsb.st_ctime)
-		return false;
-	return true;
+		if (stat(path, &st) || entry->ctime != st.st_ctime)
+			valid = false;
+	}
+
+	return valid;
 }
 
 /* Must be called under store_lock */
-static void remove_initpid(struct pidns_init_store *e)
+static void remove_initpid(struct pidns_init_store *entry)
 {
-	struct pidns_init_store *tmp;
-	int h;
+	struct pidns_init_store *it;
+	int ino_hash;
 
-	lxcfs_debug("Remove_initpid: removing entry for %d.\n", e->initpid);
+	lxcfs_debug("Removing cached entry for pid %d from init pid cache",
+		    entry->initpid);
 
-	h = HASH(e->ino);
-	if (pidns_hash_table[h] == e) {
-		pidns_hash_table[h] = e->next;
-		free_disarm(e);
+	ino_hash = HASH(entry->ino);
+	if (pidns_hash_table[ino_hash] == entry) {
+		pidns_hash_table[ino_hash] = entry->next;
+		close_prot_errno_disarm(entry->init_pidfd);
+		free_disarm(entry);
 		return;
 	}
 
-	tmp = pidns_hash_table[h];
-	while (tmp) {
-		if (tmp->next == e) {
-			tmp->next = e->next;
-			free_disarm(e);
+	it = pidns_hash_table[ino_hash];
+	while (it) {
+		if (it->next == entry) {
+			it->next = entry->next;
+			close_prot_errno_disarm(entry->init_pidfd);
+			free_disarm(entry);
 			return;
 		}
-		tmp = tmp->next;
+		it = it->next;
 	}
 }
 
@@ -176,39 +194,39 @@ static void remove_initpid(struct pidns_init_store *e)
 static void prune_initpid_store(void)
 {
 	static long int last_prune = 0;
-	struct pidns_init_store *e, *prev, *delme;
 	long int now, threshold;
-	int i;
 
 	if (!last_prune) {
 		last_prune = time(NULL);
 		return;
 	}
+
 	now = time(NULL);
 	if (now < last_prune + PURGE_SECS)
 		return;
 
-	lxcfs_debug("%s\n", "Pruning.");
+	lxcfs_debug("Pruning init pid cache");
 
 	last_prune = now;
 	threshold = now - 2 * PURGE_SECS;
 
-	for (i = 0; i < PIDNS_HASH_SIZE; i++) {
-		for (prev = NULL, e = pidns_hash_table[i]; e; ) {
-			if (e->lastcheck < threshold) {
+	for (int i = 0; i < PIDNS_HASH_SIZE; i++) {
+		for (struct pidns_init_store *entry = pidns_hash_table[i], *prev = NULL; entry;) {
+			if (entry->lastcheck < threshold) {
+				struct pidns_init_store *cur = entry;
 
-				lxcfs_debug("Removing cached entry for %d.\n", e->initpid);
+				lxcfs_debug("Removed cache entry for pid %d to init pid cache", cur->initpid);
 
-				delme = e;
 				if (prev)
-					prev->next = e->next;
+					prev->next = entry->next;
 				else
-					pidns_hash_table[i] = e->next;
-				e = e->next;
-				free_disarm(delme);
+					pidns_hash_table[i] = entry->next;
+				entry = entry->next;
+				close_prot_errno_disarm(cur->init_pidfd);
+				free_disarm(cur);
 			} else {
-				prev = e;
-				e = e->next;
+				prev = entry;
+				entry = entry->next;
 			}
 		}
 	}
@@ -217,26 +235,37 @@ static void prune_initpid_store(void)
 /* Must be called under store_lock */
 static void save_initpid(struct stat *sb, pid_t pid)
 {
-	struct pidns_init_store *e;
-	char fpath[100];
-	struct stat procsb;
-	int h;
+	__do_free struct pidns_init_store *e = NULL;
+	__do_close_prot_errno int pidfd = -EBADF;
+	char path[LXCFS_PROC_PID_LEN];
+	struct lxcfs_opts *opts = fuse_get_context()->private_data;
+	struct stat st;
+	int ino_hash;
+
+	if (opts->use_pidfd && can_use_pidfd) {
+		pidfd = pidfd_open(pid, 0);
+		if (pidfd < 0)
+			return;
+	}
 
-	lxcfs_debug("Save_initpid: adding entry for %d.\n", pid);
+	snprintf(path, sizeof(path), "/proc/%d", pid);
+	if (stat(path, &st))
+		return;
 
-	snprintf(fpath, 100, "/proc/%d", pid);
-	if (stat(fpath, &procsb) < 0)
+	e = malloc(sizeof(*e));
+	if (!e)
 		return;
-	do {
-		e = malloc(sizeof(*e));
-	} while (!e);
+
 	e->ino = sb->st_ino;
 	e->initpid = pid;
-	e->ctime = procsb.st_ctime;
-	h = HASH(e->ino);
-	e->next = pidns_hash_table[h];
+	e->ctime = st.st_ctime;
+	ino_hash = HASH(e->ino);
+	e->next = pidns_hash_table[ino_hash];
 	e->lastcheck = time(NULL);
-	pidns_hash_table[h] = e;
+	e->init_pidfd = move_fd(pidfd);
+	pidns_hash_table[ino_hash] = move_ptr(e);
+
+	lxcfs_debug("Added cache entry %d for pid %d to init pid cache", ino_hash, pid);
 }
 
 /*
@@ -248,19 +277,19 @@ static void save_initpid(struct stat *sb, pid_t pid)
  */
 static struct pidns_init_store *lookup_verify_initpid(struct stat *sb)
 {
-	int h = HASH(sb->st_ino);
-	struct pidns_init_store *e = pidns_hash_table[h];
-
-	while (e) {
-		if (e->ino == sb->st_ino) {
-			if (initpid_still_valid(e, sb)) {
-				e->lastcheck = time(NULL);
-				return e;
+	struct pidns_init_store *entry = pidns_hash_table[HASH(sb->st_ino)];
+
+	while (entry) {
+		if (entry->ino == sb->st_ino) {
+			if (initpid_still_valid(entry)) {
+				entry->lastcheck = time(NULL);
+				return entry;
 			}
-			remove_initpid(e);
+
+			remove_initpid(entry);
 			return NULL;
 		}
-		e = e->next;
+		entry = entry->next;
 	}
 
 	return NULL;
@@ -358,31 +387,42 @@ static pid_t get_init_pid_for_task(pid_t task)
 	return ret;
 }
 
-pid_t lookup_initpid_in_store(pid_t qpid)
+#define LXCFS_PROC_PID_NS_LEN                                    \
+	(STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(uint64_t) + \
+	 STRLITERALLEN("/ns/pid") + 1)
+
+pid_t lookup_initpid_in_store(pid_t pid)
 {
 	pid_t answer = 0;
-	struct stat sb;
-	struct pidns_init_store *e;
-	char fnam[100];
+	char path[LXCFS_PROC_PID_NS_LEN];
+	struct stat st;
+	struct pidns_init_store *entry;
+
+	snprintf(path, sizeof(path), "/proc/%d/ns/pid", pid);
 
-	snprintf(fnam, 100, "/proc/%d/ns/pid", qpid);
 	store_lock();
-	if (stat(fnam, &sb) < 0)
+	if (stat(path, &st))
 		goto out;
-	e = lookup_verify_initpid(&sb);
-	if (e) {
-		answer = e->initpid;
+
+	entry = lookup_verify_initpid(&st);
+	if (entry) {
+		answer = entry->initpid;
 		goto out;
 	}
-	answer = get_init_pid_for_task(qpid);
+
+	answer = get_init_pid_for_task(pid);
 	if (answer > 0)
-		save_initpid(&sb, answer);
+		save_initpid(&st, answer);
 
 out:
-	/* we prune at end in case we are returning
-	 * the value we were about to return */
+	/*
+	 * Prune at the end in case we're returning the value we were about to
+	 * return.
+	 */
 	prune_initpid_store();
+
 	store_unlock();
+
 	return answer;
 }
 
@@ -663,8 +703,9 @@ static bool cgfs_setup_controllers(void)
 
 static void __attribute__((constructor)) lxcfs_init(void)
 {
-	__do_close_prot_errno int init_ns = -EBADF;
+	__do_close_prot_errno int init_ns = -EBADF, pidfd = -EBADF;
 	int i = 0;
+	pid_t pid;
 	char *cret;
 	char cwd[MAXPATHLEN];
 
@@ -673,7 +714,8 @@ static void __attribute__((constructor)) lxcfs_init(void)
 		log_exit("Failed to initialize cgroup support");
 
 	/* Preserve initial namespace. */
-	init_ns = preserve_ns(getpid(), "mnt");
+	pid = getpid();
+	init_ns = preserve_ns(pid, "mnt");
 	if (init_ns < 0)
 		log_exit("Failed to preserve initial mount namespace");
 
@@ -702,6 +744,12 @@ static void __attribute__((constructor)) lxcfs_init(void)
 		__do_free char *controllers = lxc_string_join(",", (const char **)(*h)->controllers, false);
 		fprintf(stderr, " %2d: fd: %3d: %s\n", i, (*h)->fd, controllers ?: "");
 	}
+
+	pidfd = pidfd_open(pid, 0);
+	if (pidfd >= 0 && pidfd_send_signal(pidfd, 0, NULL, 0) == 0) {
+		can_use_pidfd = true;
+		lxcfs_error("Kernel supports pidfds");
+	}
 }
 
 static void __attribute__((destructor)) lxcfs_exit(void)
diff --git a/bindings.h b/bindings.h
index 8e330ec..777fcd2 100644
--- a/bindings.h
+++ b/bindings.h
@@ -14,15 +14,16 @@
 #define _FILE_OFFSET_BITS 64
 
 #include <fuse.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
 
+#include "cgroup_fuse.h"
 #include "config.h"
 #include "macro.h"
-#include "cgroup_fuse.h"
 #include "proc_cpuview.h"
 #include "proc_fuse.h"
 #include "proc_loadavg.h"
@@ -64,9 +65,11 @@ struct file_info {
 
 struct lxcfs_opts {
 	bool swap_off;
+	bool use_pidfd;
 };
 
 extern pid_t lookup_initpid_in_store(pid_t qpid);
 extern void prune_init_slice(char *cg);
+extern bool supports_pidfd(void);
 
 #endif /* __LXCFS_BINDINGS_H */
diff --git a/configure.ac b/configure.ac
index 63cd934..e22cf3c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -171,4 +171,14 @@ AC_CHECK_FUNCS([strlcat],
 	AC_DEFINE(HAVE_STRLCAT,1,[Have strlcat]),
 	AM_CONDITIONAL(HAVE_STRLCAT, false))
 
+AC_CHECK_FUNCS([pidfd_open],
+	AM_CONDITIONAL(HAVE_PIDFD_OPEN, true)
+	AC_DEFINE(HAVE_PIDFD_OPEN,1,[Supports pidfd_open]),
+	AM_CONDITIONAL(HAVE_PIDFD_OPEN, false))
+
+AC_CHECK_FUNCS([pidfd_send_signal],
+	AM_CONDITIONAL(HAVE_PIDFD_SEND_SIGNAL, true)
+	AC_DEFINE(HAVE_PIDFD_SEND_SIGNAL,1,[Supports pidfd_send_signal]),
+	AM_CONDITIONAL(HAVE_PIDFD_SEND_SIGNAL, false))
+
 AC_OUTPUT
diff --git a/lxcfs.c b/lxcfs.c
index ce5a24b..d825303 100644
--- a/lxcfs.c
+++ b/lxcfs.c
@@ -1102,6 +1102,7 @@ int main(int argc, char *argv[])
 		goto out;
 	}
 	opts->swap_off = false;
+	opts->use_pidfd = false;
 
 	/* accomodate older init scripts */
 	swallow_arg(&argc, argv, "-s");
@@ -1112,6 +1113,9 @@ int main(int argc, char *argv[])
 	if (swallow_arg(&argc, argv, "-u"))
 		opts->swap_off = true;
 
+	if (swallow_arg(&argc, argv, "--pidfd"))
+		opts->use_pidfd = true;
+
 	if (swallow_option(&argc, argv, "-o", &v)) {
 		/* Parse multiple values */
 		for (; (token = strtok_r(v, ",", &saveptr)); v = NULL) {
diff --git a/utils.h b/utils.h
index 6c874b4..d5673ed 100644
--- a/utils.h
+++ b/utils.h
@@ -46,4 +46,27 @@ extern int read_file_fuse(const char *path, char *buf, size_t size,
 extern void prune_init_slice(char *cg);
 extern int wait_for_pid(pid_t pid);
 
+#ifndef HAVE_PIDFD_OPEN
+static inline int pidfd_open(pid_t pid, unsigned int flags)
+{
+#ifdef __NR_pidfd_open
+	return syscall(__NR_pidfd_open, pid, flags);
+#else
+	return ret_errno(ENOSYS);
+#endif
+}
+#endif
+
+#ifndef HAVE_PIDFD_SEND_SIGNAL
+static inline int pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
+				    unsigned int flags)
+{
+#ifdef __NR_pidfd_send_signal
+	return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
+#else
+	return ret_errno(ENOSYS);
+#endif
+}
+#endif
+
 #endif /* __LXCFS_UTILS_H */


More information about the lxc-devel mailing list