[lxc-devel] [lxc/master] clone: add infrastructure for CLONE_PIDFD

brauner on Github lxc-bot at linuxcontainers.org
Thu May 9 13:03:01 UTC 2019


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 486 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20190509/a4c9ba23/attachment.bin>
-------------- next part --------------
From b52e8e68a61866da2af86e85905ec850f8a8b7fc Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Thu, 9 May 2019 15:00:32 +0200
Subject: [PATCH 1/2] raw_syscalls: simplify assembly

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
Co-developed-by: David Howells <dhowells at redhat.com>
Signed-off-by: David Howells <dhowells at redhat.com>
---
 src/lxc/raw_syscalls.c | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/src/lxc/raw_syscalls.c b/src/lxc/raw_syscalls.c
index a16f6edf76..bbf5409b0f 100644
--- a/src/lxc/raw_syscalls.c
+++ b/src/lxc/raw_syscalls.c
@@ -56,31 +56,41 @@ __returns_twice pid_t lxc_raw_clone(unsigned long flags)
 		 * boolean flag whether this is the child or the parent in %o1.
 		 * Inline assembly is needed to get the flag returned in %o1.
 		 */
-		int child_pid, in_child, ret;
+		register long g1 asm("g1") = __NR_clone;
+		register long o0 asm("o0") = flags | SIGCHLD;
+		register long o1 asm("o1") = 0; /* is parent/child indicator */
+		long is_error, retval, in_child;
+		pid_t child_pid;
 
-                asm volatile("mov %3, %%g1\n\t"
-                             "mov %4, %%o0\n\t"
-                             "mov 0 , %%o1\n\t"
+		asm volatile(
 #if defined(__arch64__)
-                             "t 0x6d\n\t"
+		    "t 0x6d\n\t" /* 64-bit trap */
 #else
-                             "t 0x10\n\t"
+		    "t 0x10\n\t" /* 32-bit trap */
 #endif
-                             "addx %%g0, 0, %2\n\t"
-                             "mov %%o1, %0\n\t"
-                             "mov %%o0, %1" :
-                             "=r"(in_child), "=r"(child_pid), "=r"(ret) :
-                             "i"(__NR_clone), "r"(flags | SIGCHLD) :
-                             "%o1", "%o0", "%g1", "cc" );
+		    /*
+		     * catch errors: On sparc, the carry bit (csr) in the
+		     * processor status register (psr) is used instead of a
+		     * full register.
+		     */
+		    "addx %%g0, 0, %g1"
+		    : "=r"(g1), "=r"(o0), "=r"(o1) /* outputs */
+		    : "r"(g1), "r"(o0), "r"(o1)    /* inputs */
+		    : "%cc");		           /* clobbers */
 
-		if (ret) {
-			errno = child_pid;
+		is_error = g1;
+		retval = o0;
+		in_child = o1;
+
+		if (is_error) {
+			errno = retval;
 			return -1;
 		}
 
 		if (in_child)
 			return 0;
 
+		child_pid = retval;
 		return child_pid;
 	}
 #elif defined(__ia64__)

From a59440be0901362f64e600cc570c8efe41ed4995 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Thu, 9 May 2019 15:01:27 +0200
Subject: [PATCH 2/2] clone: add infrastructure for CLONE_PIDFD

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=eac7078a0fff1e72cf2b641721e3f55ec7e5e21e

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/attach.c          |  2 +-
 src/lxc/conf.c            |  2 +-
 src/lxc/raw_syscalls.c    | 20 +++++++++++---------
 src/lxc/raw_syscalls.h    | 10 ++++++++--
 src/lxc/start.c           |  4 ++--
 src/lxc/utils.c           |  2 +-
 src/tests/lxc_raw_clone.c | 26 +++++++++++++-------------
 7 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/src/lxc/attach.c b/src/lxc/attach.c
index 9d37793e5c..ce51352c67 100644
--- a/src/lxc/attach.c
+++ b/src/lxc/attach.c
@@ -1426,7 +1426,7 @@ int lxc_attach(const char *name, const char *lxcpath,
 	payload.exec_function = exec_function;
 	payload.exec_payload = exec_payload;
 
-	pid = lxc_raw_clone(CLONE_PARENT);
+	pid = lxc_raw_clone(CLONE_PARENT, NULL);
 	if (pid < 0) {
 		SYSERROR("Failed to clone attached process");
 		shutdown(ipc_sockets[1], SHUT_RDWR);
diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index ec9543f743..2515c881ef 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -4337,7 +4337,7 @@ int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data,
 	d.p[1] = p[1];
 
 	/* Clone child in new user namespace. */
-	pid = lxc_raw_clone_cb(run_userns_fn, &d, CLONE_NEWUSER);
+	pid = lxc_raw_clone_cb(run_userns_fn, &d, CLONE_NEWUSER, NULL);
 	if (pid < 0) {
 		ERROR("Failed to clone process in new user namespace");
 		goto on_error;
diff --git a/src/lxc/raw_syscalls.c b/src/lxc/raw_syscalls.c
index bbf5409b0f..f58b8d8987 100644
--- a/src/lxc/raw_syscalls.c
+++ b/src/lxc/raw_syscalls.c
@@ -33,7 +33,7 @@ int lxc_raw_execveat(int dirfd, const char *pathname, char *const argv[],
  * The nice thing about this is that we get fork() behavior. That is
  * lxc_raw_clone() returns 0 in the child and the child pid in the parent.
  */
-__returns_twice pid_t lxc_raw_clone(unsigned long flags)
+__returns_twice pid_t lxc_raw_clone(unsigned long flags, int *pidfd)
 {
 	/*
 	 * These flags don't interest at all so we don't jump through any hoops
@@ -48,7 +48,7 @@ __returns_twice pid_t lxc_raw_clone(unsigned long flags)
 	/* On s390/s390x and cris the order of the first and second arguments
 	 * of the system call is reversed.
 	 */
-	return syscall(__NR_clone, NULL, flags | SIGCHLD);
+	return syscall(__NR_clone, NULL, flags | SIGCHLD, pidfd);
 #elif defined(__sparc__) && defined(__arch64__)
 	{
 		/*
@@ -59,6 +59,7 @@ __returns_twice pid_t lxc_raw_clone(unsigned long flags)
 		register long g1 asm("g1") = __NR_clone;
 		register long o0 asm("o0") = flags | SIGCHLD;
 		register long o1 asm("o1") = 0; /* is parent/child indicator */
+		register long o2 asm("o2") = (unsigned long)pidfd;
 		long is_error, retval, in_child;
 		pid_t child_pid;
 
@@ -74,9 +75,9 @@ __returns_twice pid_t lxc_raw_clone(unsigned long flags)
 		     * full register.
 		     */
 		    "addx %%g0, 0, %g1"
-		    : "=r"(g1), "=r"(o0), "=r"(o1) /* outputs */
-		    : "r"(g1), "r"(o0), "r"(o1)    /* inputs */
-		    : "%cc");		           /* clobbers */
+		    : "=r"(g1), "=r"(o0), "=r"(o1), "=r"(o2) /* outputs */
+		    : "r"(g1), "r"(o0), "r"(o1), "r"(o2)     /* inputs */
+		    : "%cc");				     /* clobbers */
 
 		is_error = g1;
 		retval = o0;
@@ -95,17 +96,18 @@ __returns_twice pid_t lxc_raw_clone(unsigned long flags)
 	}
 #elif defined(__ia64__)
 	/* On ia64 the stack and stack size are passed as separate arguments. */
-	return syscall(__NR_clone, flags | SIGCHLD, NULL, prctl_arg(0));
+	return syscall(__NR_clone, flags | SIGCHLD, NULL, prctl_arg(0), pidfd);
 #else
-	return syscall(__NR_clone, flags | SIGCHLD, NULL);
+	return syscall(__NR_clone, flags | SIGCHLD, NULL, pidfd);
 #endif
 }
 
-pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, unsigned long flags)
+pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, unsigned long flags,
+		       int *pidfd)
 {
 	pid_t pid;
 
-	pid = lxc_raw_clone(flags);
+	pid = lxc_raw_clone(flags, pidfd);
 	if (pid < 0)
 		return -1;
 
diff --git a/src/lxc/raw_syscalls.h b/src/lxc/raw_syscalls.h
index 6c27f26a0b..82ff47dfd8 100644
--- a/src/lxc/raw_syscalls.h
+++ b/src/lxc/raw_syscalls.h
@@ -30,6 +30,11 @@
 #include <sys/syscall.h>
 #include <unistd.h>
 
+/* clone */
+#ifndef CLONE_PIDFD
+#define CLONE_PIDFD 0x00001000
+#endif
+
 /*
  * lxc_raw_clone() - create a new process
  *
@@ -57,7 +62,7 @@
  * - must call lxc_raw_getpid():
  *   The child must use lxc_raw_getpid() to retrieve its pid.
  */
-extern pid_t lxc_raw_clone(unsigned long flags);
+extern pid_t lxc_raw_clone(unsigned long flags, int *pidfd);
 
 /*
  * lxc_raw_clone_cb() - create a new process
@@ -70,7 +75,8 @@ extern pid_t lxc_raw_clone(unsigned long flags);
  * All other comments that apply to lxc_raw_clone() apply to lxc_raw_clone_cb()
  * as well.
  */
-extern pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, unsigned long flags);
+extern pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args,
+			      unsigned long flags, int *pidfd);
 
 extern int lxc_raw_execveat(int dirfd, const char *pathname, char *const argv[],
 			    char *const envp[], int flags);
diff --git a/src/lxc/start.c b/src/lxc/start.c
index 651511dbe3..34798292cf 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -1601,7 +1601,7 @@ static inline int do_share_ns(void *arg)
 
 	flags = handler->ns_on_clone_flags;
 	flags |= CLONE_PARENT;
-	handler->pid = lxc_raw_clone_cb(do_start, handler, flags);
+	handler->pid = lxc_raw_clone_cb(do_start, handler, flags, NULL);
 	if (handler->pid < 0)
 		return -1;
 
@@ -1748,7 +1748,7 @@ static int lxc_spawn(struct lxc_handler *handler)
 		}
 	} else {
 		handler->pid = lxc_raw_clone_cb(do_start, handler,
-						handler->ns_on_clone_flags);
+						handler->ns_on_clone_flags, NULL);
 	}
 	if (handler->pid < 0) {
 		SYSERROR(LXC_CLONE_ERROR);
diff --git a/src/lxc/utils.c b/src/lxc/utils.c
index ea081c566c..331e216793 100644
--- a/src/lxc/utils.c
+++ b/src/lxc/utils.c
@@ -1609,7 +1609,7 @@ int run_command_internal(char *buf, size_t buf_size, int (*child_fn)(void *), vo
 		return -1;
 	}
 
-	child = lxc_raw_clone(0);
+	child = lxc_raw_clone(0, NULL);
 	if (child < 0) {
 		close(pipefd[0]);
 		close(pipefd[1]);
diff --git a/src/tests/lxc_raw_clone.c b/src/tests/lxc_raw_clone.c
index 63a8361764..655454f395 100644
--- a/src/tests/lxc_raw_clone.c
+++ b/src/tests/lxc_raw_clone.c
@@ -48,42 +48,42 @@ int main(int argc, char *argv[])
 	pid_t pid;
 	int flags = 0;
 
-	pid = lxc_raw_clone(CLONE_PARENT_SETTID);
+	pid = lxc_raw_clone(CLONE_PARENT_SETTID, NULL);
 	if (pid >= 0 || pid != -EINVAL) {
 		lxc_error("%s\n", "Calling lxc_raw_clone(CLONE_PARENT_SETTID) "
 				  "should not be possible");
 		exit(EXIT_FAILURE);
 	}
 
-	pid = lxc_raw_clone(CLONE_CHILD_SETTID);
+	pid = lxc_raw_clone(CLONE_CHILD_SETTID, NULL);
 	if (pid >= 0 || pid != -EINVAL) {
 		lxc_error("%s\n", "Calling lxc_raw_clone(CLONE_CHILD_SETTID) "
 				  "should not be possible");
 		exit(EXIT_FAILURE);
 	}
 
-	pid = lxc_raw_clone(CLONE_CHILD_CLEARTID);
+	pid = lxc_raw_clone(CLONE_CHILD_CLEARTID, NULL);
 	if (pid >= 0 || pid != -EINVAL) {
 		lxc_error("%s\n", "Calling lxc_raw_clone(CLONE_CHILD_CLEARTID) "
 				  "should not be possible");
 		exit(EXIT_FAILURE);
 	}
 
-	pid = lxc_raw_clone(CLONE_SETTLS);
+	pid = lxc_raw_clone(CLONE_SETTLS, NULL);
 	if (pid >= 0 || pid != -EINVAL) {
 		lxc_error("%s\n", "Calling lxc_raw_clone(CLONE_SETTLS) should "
 				  "not be possible");
 		exit(EXIT_FAILURE);
 	}
 
-	pid = lxc_raw_clone(CLONE_VM);
+	pid = lxc_raw_clone(CLONE_VM, NULL);
 	if (pid >= 0 || pid != -EINVAL) {
 		lxc_error("%s\n", "Calling lxc_raw_clone(CLONE_VM) should "
 			  "not be possible");
 		exit(EXIT_FAILURE);
 	}
 
-	pid = lxc_raw_clone(0);
+	pid = lxc_raw_clone(0, NULL);
 	if (pid < 0) {
 		lxc_error("%s\n", "Failed to call lxc_raw_clone(0)");
 		exit(EXIT_FAILURE);
@@ -100,7 +100,7 @@ int main(int argc, char *argv[])
 		exit(EXIT_FAILURE);
 	}
 
-	pid = lxc_raw_clone(0);
+	pid = lxc_raw_clone(0, NULL);
 	if (pid < 0) {
 		lxc_error("%s\n", "Failed to call lxc_raw_clone(0)");
 		exit(EXIT_FAILURE);
@@ -127,7 +127,7 @@ int main(int argc, char *argv[])
 	flags |= CLONE_NEWPID;
 	flags |= CLONE_NEWUTS;
 
-	pid = lxc_raw_clone(flags);
+	pid = lxc_raw_clone(flags, NULL);
 	if (pid < 0) {
 		lxc_error("%s\n", "Failed to call lxc_raw_clone(CLONE_NEWUSER "
 				  "| CLONE_NEWCGROUP | CLONE_NEWNS | "
@@ -147,7 +147,7 @@ int main(int argc, char *argv[])
 		exit(EXIT_FAILURE);
 	}
 
-	pid = lxc_raw_clone(flags);
+	pid = lxc_raw_clone(flags, NULL);
 	if (pid < 0) {
 		lxc_error("%s\n", "Failed to call lxc_raw_clone(CLONE_NEWUSER "
 				  "| CLONE_NEWCGROUP | CLONE_NEWNS | "
@@ -168,7 +168,7 @@ int main(int argc, char *argv[])
 		exit(EXIT_FAILURE);
 	}
 
-	pid = lxc_raw_clone(CLONE_VFORK);
+	pid = lxc_raw_clone(CLONE_VFORK, NULL);
 	if (pid < 0) {
 		lxc_error("%s\n", "Failed to call lxc_raw_clone(CLONE_VFORK);");
 		exit(EXIT_FAILURE);
@@ -185,7 +185,7 @@ int main(int argc, char *argv[])
 		exit(EXIT_FAILURE);
 	}
 
-	pid = lxc_raw_clone(CLONE_VFORK);
+	pid = lxc_raw_clone(CLONE_VFORK, NULL);
 	if (pid < 0) {
 		lxc_error("%s\n", "Failed to call lxc_raw_clone(CLONE_VFORK);");
 		exit(EXIT_FAILURE);
@@ -202,7 +202,7 @@ int main(int argc, char *argv[])
 		exit(EXIT_FAILURE);
 	}
 
-	pid = lxc_raw_clone(CLONE_FILES);
+	pid = lxc_raw_clone(CLONE_FILES, NULL);
 	if (pid < 0) {
 		lxc_error("%s\n", "Failed to call lxc_raw_clone(CLONE_FILES);");
 		exit(EXIT_FAILURE);
@@ -219,7 +219,7 @@ int main(int argc, char *argv[])
 		exit(EXIT_FAILURE);
 	}
 
-	pid = lxc_raw_clone(CLONE_FILES);
+	pid = lxc_raw_clone(CLONE_FILES, NULL);
 	if (pid < 0) {
 		lxc_error("%s\n", "Failed to call lxc_raw_clone(CLONE_FILES);");
 		exit(EXIT_FAILURE);


More information about the lxc-devel mailing list