[lxc-devel] [lxc/master] clone3: add infrastructure and switch container creation to it

brauner on Github lxc-bot at linuxcontainers.org
Fri May 15 12:43:39 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 364 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200515/1d730732/attachment-0001.bin>
-------------- next part --------------
From 78ae61d86cd502df5f757ba4ba5cf5304aad55d2 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 15 May 2020 11:44:46 +0200
Subject: [PATCH 1/5] syscall_numbers: handle ia64 syscall numbers correctly

They are offset by 1024.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/syscall_numbers.h | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/src/lxc/syscall_numbers.h b/src/lxc/syscall_numbers.h
index e7a9dd9a4f..c4be407a47 100644
--- a/src/lxc/syscall_numbers.h
+++ b/src/lxc/syscall_numbers.h
@@ -40,7 +40,7 @@
 	#elif defined __sparc__
 		#define __NR_keyctl 283
 	#elif defined __ia64__
-		#define __NR_keyctl 249
+		#define __NR_keyctl (249 + 1024)
 	#elif defined _MIPS_SIM
 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
 			#define __NR_keyctl 4282
@@ -112,7 +112,7 @@
 	#elif defined __sparc__
 		#define __NR_pivot_root 146
 	#elif defined __ia64__
-		#define __NR_pivot_root 183
+		#define __NR_pivot_root (183 + 1024)
 	#elif defined _MIPS_SIM
 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
 			#define __NR_pivot_root 4216
@@ -147,7 +147,7 @@
 	#elif defined __sparc__
 		#define __NR_setns 337
 	#elif defined __ia64__
-		#define __NR_setns 306
+		#define __NR_setns (306 + 1024)
 	#elif defined _MIPS_SIM
 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
 			#define __NR_setns 4344
@@ -182,7 +182,7 @@
 	#elif defined __sparc__
 		#define __NR_sethostname 88
 	#elif defined __ia64__
-		#define __NR_sethostname 59
+		#define __NR_sethostname (59 + 1024)
 	#elif defined _MIPS_SIM
 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
 			#define __NR_sethostname 474
@@ -217,7 +217,7 @@
 	#elif defined __sparc__
 		#define __NR_signalfd 311
 	#elif defined __ia64__
-		#define __NR_signalfd 283
+		#define __NR_signalfd (283 + 1024)
 	#elif defined _MIPS_SIM
 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
 			#define __NR_signalfd 4317
@@ -252,7 +252,7 @@
 	#elif defined __sparc__
 		#define __NR_signalfd4 317
 	#elif defined __ia64__
-		#define __NR_signalfd4 289
+		#define __NR_signalfd4 (289 + 1024)
 	#elif defined _MIPS_SIM
 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
 			#define __NR_signalfd4 4324
@@ -287,7 +287,7 @@
 	#elif defined __sparc__
 		#define __NR_unshare 299
 	#elif defined __ia64__
-		#define __NR_unshare 272
+		#define __NR_unshare (272 + 1024)
 	#elif defined _MIPS_SIM
 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
 			#define __NR_unshare 4303
@@ -322,7 +322,7 @@
 	#elif defined __sparc__
 		#define __NR_bpf 349
 	#elif defined __ia64__
-		#define __NR_bpf 317
+		#define __NR_bpf (317 + 1024)
 	#elif defined _MIPS_SIM
 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
 			#define __NR_bpf 4355
@@ -357,7 +357,7 @@
 	#elif defined __sparc__
 		#define __NR_faccessat 296
 	#elif defined __ia64__
-		#define __NR_faccessat 269
+		#define __NR_faccessat (269 + 1024)
 	#elif defined _MIPS_SIM
 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
 			#define __NR_faccessat 4300
@@ -387,6 +387,8 @@
 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
 			#define __NR_pidfd_send_signal 5424
 		#endif
+	#elif defined __ia64__
+		#define __NR_pidfd_send_signal (424 + 1024)
 	#else
 		#define __NR_pidfd_send_signal 424
 	#endif
@@ -410,7 +412,7 @@
 	#elif defined __sparc__
 		#define __NR_seccomp 346
 	#elif defined __ia64__
-		#define __NR_seccomp 329
+		#define __NR_seccomp (329 + 1024)
 	#elif defined _MIPS_SIM
 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
 			#define __NR_seccomp 4352
@@ -445,7 +447,7 @@
 	#elif defined __sparc__
 		#define __NR_gettid 143
 	#elif defined __ia64__
-		#define __NR_gettid 81
+		#define __NR_gettid (81 + 1024)
 	#elif defined _MIPS_SIM
 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
 			#define __NR_gettid 4222
@@ -484,7 +486,7 @@
 	#elif defined __sparc__
 		#define __NR_execveat 350
 	#elif defined __ia64__
-		#define __NR_execveat 318
+		#define __NR_execveat (318 + 1024)
 	#elif defined _MIPS_SIM
 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
 			#define __NR_execveat 4356
@@ -514,6 +516,8 @@
 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
 			#define __NR_move_mount 5429
 		#endif
+	#elif defined __ia64__
+		#define __NR_move_mount (428 + 1024)
 	#else
 		#define __NR_move_mount 429
 	#endif
@@ -532,6 +536,8 @@
 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
 			#define __NR_open_tree 5428
 		#endif
+	#elif defined __ia64__
+		#define __NR_open_tree (428 + 1024)
 	#else
 		#define __NR_open_tree 428
 	#endif

From 923d3a2dba12dee9a543af4757f1c37f83007a00 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 15 May 2020 11:48:25 +0200
Subject: [PATCH 2/5] syscall_numbers: add clone3()

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/syscall_numbers.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/src/lxc/syscall_numbers.h b/src/lxc/syscall_numbers.h
index c4be407a47..bfd0e57ab9 100644
--- a/src/lxc/syscall_numbers.h
+++ b/src/lxc/syscall_numbers.h
@@ -543,4 +543,24 @@
 	#endif
 #endif
 
+#ifndef __NR_clone3
+	#if defined __alpha__
+		#define __NR_clone3 545
+	#elif defined _MIPS_SIM
+		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
+			#define __NR_clone3 4435
+		#endif
+		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
+			#define __NR_clone3 6435
+		#endif
+		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+			#define __NR_clone3 5435
+		#endif
+	#elif defined __ia64__
+		#define __NR_clone3 (435 + 1024)
+	#else
+		#define __NR_clone3 435
+	#endif
+#endif
+
 #endif /* __LXC_SYSCALL_NUMBERS_H */

From f40988c7736333b67f55dcc9e3a3340f7793a16f Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 15 May 2020 12:32:28 +0200
Subject: [PATCH 3/5] process_utils: introduce new process_utils.{c,h}

This will be the central place for all process management helpers. This also
removes raw_syscalls.{c,h}.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 configure.ac                                |   2 +-
 src/include/fexecve.c                       |   2 +-
 src/lxc/Makefile.am                         |  10 +-
 src/lxc/af_unix.c                           |   2 +-
 src/lxc/attach.c                            |   2 +-
 src/lxc/cmd/lxc_init.c                      |   2 +-
 src/lxc/cmd/lxc_monitord.c                  |   2 +-
 src/lxc/cmd/lxc_user_nic.c                  |   2 +-
 src/lxc/conf.c                              |   4 +-
 src/lxc/execute.c                           |   4 +-
 src/lxc/lsm/apparmor.c                      |   2 +-
 src/lxc/lxccontainer.c                      |   2 +-
 src/lxc/namespace.c                         |  27 ---
 src/lxc/namespace.h                         |  90 -------
 src/lxc/network.c                           |   2 +-
 src/lxc/{raw_syscalls.c => process_utils.c} |  36 ++-
 src/lxc/process_utils.h                     | 248 ++++++++++++++++++++
 src/lxc/raw_syscalls.h                      |  94 --------
 src/lxc/rexec.c                             |   2 +-
 src/lxc/start.c                             |   2 +-
 src/lxc/utils.c                             |   2 +-
 src/lxc/utils.h                             |   2 +-
 src/tests/Makefile.am                       |   2 +-
 src/tests/lxc_raw_clone.c                   |   2 +-
 24 files changed, 303 insertions(+), 242 deletions(-)
 rename src/lxc/{raw_syscalls.c => process_utils.c} (82%)
 create mode 100644 src/lxc/process_utils.h
 delete mode 100644 src/lxc/raw_syscalls.h

diff --git a/configure.ac b/configure.ac
index c55810831e..7f589f9405 100644
--- a/configure.ac
+++ b/configure.ac
@@ -622,7 +622,7 @@ AC_CHECK_HEADER([ifaddrs.h],
 AC_HEADER_MAJOR
 
 # Check for some syscalls functions
-AC_CHECK_FUNCS([setns pivot_root sethostname unshare rand_r confstr faccessat gettid memfd_create move_mount open_tree])
+AC_CHECK_FUNCS([setns pivot_root sethostname unshare rand_r confstr faccessat gettid memfd_create move_mount open_tree execveat])
 
 # Check for strerror_r() support. Defines:
 # - HAVE_STRERROR_R if available
diff --git a/src/include/fexecve.c b/src/include/fexecve.c
index 123f273098..0627cc802d 100644
--- a/src/include/fexecve.c
+++ b/src/include/fexecve.c
@@ -29,7 +29,7 @@
 #include <fcntl.h>
 #include "config.h"
 #include "macro.h"
-#include "raw_syscalls.h"
+#include "process_utils.h"
 
 int fexecve(int fd, char *const argv[], char *const envp[])
 {
diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am
index c374c2d0ac..d1e23647e0 100644
--- a/src/lxc/Makefile.am
+++ b/src/lxc/Makefile.am
@@ -27,7 +27,7 @@ noinst_HEADERS = api_extensions.h \
 		 memory_utils.h \
 		 monitor.h \
 		 namespace.h \
-		 raw_syscalls.h \
+		 process_utils.h \
 		 rexec.h \
 		 start.h \
 		 state.h \
@@ -128,7 +128,7 @@ liblxc_la_SOURCES = af_unix.c af_unix.h \
 		    network.c network.h \
 		    monitor.c monitor.h \
 		    parse.c parse.h \
-		    raw_syscalls.c raw_syscalls.h \
+		    process_utils.c process_utils.h \
 		    ringbuf.c ringbuf.h \
 		    rtnl.c rtnl.h \
 		    state.c state.h \
@@ -384,7 +384,7 @@ init_lxc_SOURCES = cmd/lxc_init.c \
 		   initutils.c initutils.h \
 		   memory_utils.h \
 		   parse.c parse.h \
-		   raw_syscalls.c raw_syscalls.h \
+		   process_utils.c process_utils.h \
 		   syscall_numbers.h \
 		   string_utils.c string_utils.h
 
@@ -395,7 +395,7 @@ lxc_monitord_SOURCES = cmd/lxc_monitord.c \
 		       log.c log.h \
 		       mainloop.c mainloop.h \
 		       monitor.c monitor.h \
-		       raw_syscalls.c raw_syscalls.h \
+		       process_utils.c process_utils.h \
 		       syscall_numbers.h \
 		       utils.c utils.h
 lxc_user_nic_SOURCES = cmd/lxc_user_nic.c \
@@ -404,7 +404,7 @@ lxc_user_nic_SOURCES = cmd/lxc_user_nic.c \
 		       memory_utils.h \
 		       network.c network.h \
 		       parse.c parse.h \
-		       raw_syscalls.c raw_syscalls.h \
+		       process_utils.c process_utils.h \
 		       syscall_numbers.h \
 		       file_utils.c file_utils.h \
 		       string_utils.c string_utils.h \
diff --git a/src/lxc/af_unix.c b/src/lxc/af_unix.c
index bf626a109e..5cf54917f1 100644
--- a/src/lxc/af_unix.c
+++ b/src/lxc/af_unix.c
@@ -18,7 +18,7 @@
 #include "log.h"
 #include "macro.h"
 #include "memory_utils.h"
-#include "raw_syscalls.h"
+#include "process_utils.h"
 #include "utils.h"
 
 #ifndef HAVE_STRLCPY
diff --git a/src/lxc/attach.c b/src/lxc/attach.c
index bbf95bd5b6..57c7f46170 100644
--- a/src/lxc/attach.c
+++ b/src/lxc/attach.c
@@ -40,7 +40,7 @@
 #include "mainloop.h"
 #include "memory_utils.h"
 #include "namespace.h"
-#include "raw_syscalls.h"
+#include "process_utils.h"
 #include "syscall_wrappers.h"
 #include "terminal.h"
 #include "utils.h"
diff --git a/src/lxc/cmd/lxc_init.c b/src/lxc/cmd/lxc_init.c
index a52793343a..a03631f1a4 100644
--- a/src/lxc/cmd/lxc_init.c
+++ b/src/lxc/cmd/lxc_init.c
@@ -28,7 +28,7 @@
 #include "initutils.h"
 #include "memory_utils.h"
 #include "parse.h"
-#include "raw_syscalls.h"
+#include "process_utils.h"
 #include "string_utils.h"
 
 /* option keys for long only options */
diff --git a/src/lxc/cmd/lxc_monitord.c b/src/lxc/cmd/lxc_monitord.c
index 7318df9542..da7db28207 100644
--- a/src/lxc/cmd/lxc_monitord.c
+++ b/src/lxc/cmd/lxc_monitord.c
@@ -28,7 +28,7 @@
 #include "log.h"
 #include "mainloop.h"
 #include "monitor.h"
-#include "raw_syscalls.h"
+#include "process_utils.h"
 #include "utils.h"
 
 #define CLIENTFDS_CHUNK 64
diff --git a/src/lxc/cmd/lxc_user_nic.c b/src/lxc/cmd/lxc_user_nic.c
index edb2d8f03f..4160565f36 100644
--- a/src/lxc/cmd/lxc_user_nic.c
+++ b/src/lxc/cmd/lxc_user_nic.c
@@ -36,7 +36,7 @@
 #include "memory_utils.h"
 #include "network.h"
 #include "parse.h"
-#include "raw_syscalls.h"
+#include "process_utils.h"
 #include "string_utils.h"
 #include "syscall_wrappers.h"
 #include "utils.h"
diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index e2e2f9e97a..5cbca60006 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -51,7 +51,7 @@
 #include "namespace.h"
 #include "network.h"
 #include "parse.h"
-#include "raw_syscalls.h"
+#include "process_utils.h"
 #include "ringbuf.h"
 #include "start.h"
 #include "storage.h"
@@ -3245,7 +3245,7 @@ static bool verify_start_hooks(struct lxc_conf *conf)
 
 static bool execveat_supported(void)
 {
-	lxc_raw_execveat(-1, "", NULL, NULL, AT_EMPTY_PATH);
+	execveat(-1, "", NULL, NULL, AT_EMPTY_PATH);
 	if (errno == ENOSYS)
 		return false;
 
diff --git a/src/lxc/execute.c b/src/lxc/execute.c
index 7dd835862f..7175ef2cf2 100644
--- a/src/lxc/execute.c
+++ b/src/lxc/execute.c
@@ -14,7 +14,7 @@
 #include "config.h"
 #include "log.h"
 #include "start.h"
-#include "raw_syscalls.h"
+#include "process_utils.h"
 #include "utils.h"
 
 lxc_log_define(execute, start);
@@ -66,7 +66,7 @@ static int execute_start(struct lxc_handler *handler, void* data)
 	NOTICE("Exec'ing \"%s\"", my_args->argv[0]);
 
 	if (my_args->init_fd >= 0)
-		lxc_raw_execveat(my_args->init_fd, "", argv, environ, AT_EMPTY_PATH);
+		execveat(my_args->init_fd, "", argv, environ, AT_EMPTY_PATH);
 	else
 		execvp(argv[0], argv);
 	SYSERROR("Failed to exec %s", argv[0]);
diff --git a/src/lxc/lsm/apparmor.c b/src/lxc/lsm/apparmor.c
index 4fc18eb438..cef95c8542 100644
--- a/src/lxc/lsm/apparmor.c
+++ b/src/lxc/lsm/apparmor.c
@@ -19,7 +19,7 @@
 #include "log.h"
 #include "lsm.h"
 #include "parse.h"
-#include "raw_syscalls.h"
+#include "process_utils.h"
 #include "utils.h"
 
 lxc_log_define(apparmor, lsm);
diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
index 48018fe329..9c9d023b87 100644
--- a/src/lxc/lxccontainer.c
+++ b/src/lxc/lxccontainer.c
@@ -49,7 +49,7 @@
 #include "namespace.h"
 #include "network.h"
 #include "parse.h"
-#include "raw_syscalls.h"
+#include "process_utils.h"
 #include "start.h"
 #include "state.h"
 #include "storage.h"
diff --git a/src/lxc/namespace.c b/src/lxc/namespace.c
index 38d2ae5d71..f2e0175630 100644
--- a/src/lxc/namespace.c
+++ b/src/lxc/namespace.c
@@ -21,33 +21,6 @@
 
 lxc_log_define(namespace, lxc);
 
-/*
- * Let's use the "standard stack limit" (i.e. glibc thread size default) for
- * stack sizes: 8MB.
- */
-#define __LXC_STACK_SIZE (8 * 1024 * 1024)
-pid_t lxc_clone(int (*fn)(void *), void *arg, int flags, int *pidfd)
-{
-	pid_t ret;
-	void *stack;
-
-	stack = malloc(__LXC_STACK_SIZE);
-	if (!stack) {
-		SYSERROR("Failed to allocate clone stack");
-		return -ENOMEM;
-	}
-
-#ifdef __ia64__
-	ret = __clone2(fn, stack, __LXC_STACK_SIZE, flags | SIGCHLD, arg, pidfd);
-#else
-	ret = clone(fn, stack + __LXC_STACK_SIZE, flags | SIGCHLD, arg, pidfd);
-#endif
-	if (ret < 0)
-		SYSERROR("Failed to clone (%#x)", flags);
-
-	return ret;
-}
-
 /* Leave the user namespace at the first position in the array of structs so
  * that we always attach to it first when iterating over the struct and using
  * setns() to switch namespaces. This especially affects lxc_attach(): Suppose
diff --git a/src/lxc/namespace.h b/src/lxc/namespace.h
index a8fda783c3..84976f60f2 100644
--- a/src/lxc/namespace.h
+++ b/src/lxc/namespace.h
@@ -7,63 +7,6 @@
 #include <unistd.h>
 #include <sys/syscall.h>
 
-#ifndef CLONE_PARENT_SETTID
-#define CLONE_PARENT_SETTID 0x00100000
-#endif
-
-#ifndef CLONE_CHILD_CLEARTID
-#define CLONE_CHILD_CLEARTID 0x00200000
-#endif
-
-#ifndef CLONE_CHILD_SETTID
-#define CLONE_CHILD_SETTID 0x01000000
-#endif
-
-#ifndef CLONE_VFORK
-#define CLONE_VFORK 0x00004000
-#endif
-
-#ifndef CLONE_THREAD
-#define CLONE_THREAD 0x00010000
-#endif
-
-#ifndef CLONE_SETTLS
-#define CLONE_SETTLS 0x00080000
-#endif
-
-#ifndef CLONE_VM
-#define CLONE_VM 0x00000100
-#endif
-
-#ifndef CLONE_FILES
-#define CLONE_FILES 0x00000400
-#endif
-
-#ifndef CLONE_FS
-#  define CLONE_FS                0x00000200
-#endif
-#ifndef CLONE_NEWNS
-#  define CLONE_NEWNS             0x00020000
-#endif
-#ifndef CLONE_NEWCGROUP
-#  define CLONE_NEWCGROUP         0x02000000
-#endif
-#ifndef CLONE_NEWUTS
-#  define CLONE_NEWUTS            0x04000000
-#endif
-#ifndef CLONE_NEWIPC
-#  define CLONE_NEWIPC            0x08000000
-#endif
-#ifndef CLONE_NEWUSER
-#  define CLONE_NEWUSER           0x10000000
-#endif
-#ifndef CLONE_NEWPID
-#  define CLONE_NEWPID            0x20000000
-#endif
-#ifndef CLONE_NEWNET
-#  define CLONE_NEWNET            0x40000000
-#endif
-
 enum {
 	LXC_NS_USER,
 	LXC_NS_MNT,
@@ -82,39 +25,6 @@ extern const struct ns_info {
 	const char *env_name;
 } ns_info[LXC_NS_MAX];
 
-#if defined(__ia64__)
-int __clone2(int (*__fn) (void *__arg), void *__child_stack_base,
-             size_t __child_stack_size, int __flags, void *__arg, ...);
-#else
-int clone(int (*fn)(void *), void *child_stack,
-	int flags, void *arg, ...
-	/* pid_t *ptid, struct user_desc *tls, pid_t *ctid */ );
-#endif
-
-/**
- * lxc_clone() - create a new process
- *
- * - allocate stack:
- *   This function allocates a new stack the size of page and passes it to the
- *   kernel.
- *
- * - support all CLONE_*flags:
- *   This function supports all CLONE_* flags. If in doubt or not sufficiently
- *   familiar with process creation in the kernel and interactions with libcs
- *   this function should be used.
- *
- * - pthread_atfork() handlers depending on libc:
- *   Whether this function runs pthread_atfork() handlers depends on the
- *   corresponding libc wrapper. glibc currently does not run pthread_atfork()
- *   handlers but does not guarantee that they are not. Other libcs might or
- *   might not run pthread_atfork() handlers. If you require guarantees please
- *   refer to the lxc_raw_clone*() functions in raw_syscalls.{c,h}.
- *
- * - should call lxc_raw_getpid():
- *   The child should use lxc_raw_getpid() to retrieve its pid.
- */
-extern pid_t lxc_clone(int (*fn)(void *), void *arg, int flags, int *pidfd);
-
 extern int lxc_namespace_2_cloneflag(const char *namespace);
 extern int lxc_namespace_2_ns_idx(const char *namespace);
 extern int lxc_namespace_2_std_identifiers(char *namespaces);
diff --git a/src/lxc/network.c b/src/lxc/network.c
index a825180cf7..eaab9eccfe 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -36,7 +36,7 @@
 #include "memory_utils.h"
 #include "network.h"
 #include "nl.h"
-#include "raw_syscalls.h"
+#include "process_utils.h"
 #include "syscall_wrappers.h"
 #include "utils.h"
 
diff --git a/src/lxc/raw_syscalls.c b/src/lxc/process_utils.c
similarity index 82%
rename from src/lxc/raw_syscalls.c
rename to src/lxc/process_utils.c
index 3c6bd2506d..89abddec54 100644
--- a/src/lxc/raw_syscalls.c
+++ b/src/lxc/process_utils.c
@@ -13,15 +13,12 @@
 
 #include "compiler.h"
 #include "config.h"
+#include "log.h"
 #include "macro.h"
-#include "raw_syscalls.h"
+#include "process_utils.h"
 #include "syscall_numbers.h"
 
-int lxc_raw_execveat(int dirfd, const char *pathname, char *const argv[],
-		     char *const envp[], int flags)
-{
-	return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
-}
+lxc_log_define(process_utils, lxc);
 
 /*
  * This is based on raw_clone in systemd but adapted to our needs. This uses
@@ -124,3 +121,30 @@ int lxc_raw_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
 {
 	return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
 }
+
+/*
+ * Let's use the "standard stack limit" (i.e. glibc thread size default) for
+ * stack sizes: 8MB.
+ */
+#define __LXC_STACK_SIZE (8 * 1024 * 1024)
+pid_t lxc_clone(int (*fn)(void *), void *arg, int flags, int *pidfd)
+{
+	pid_t ret;
+	void *stack;
+
+	stack = malloc(__LXC_STACK_SIZE);
+	if (!stack) {
+		SYSERROR("Failed to allocate clone stack");
+		return -ENOMEM;
+	}
+
+#ifdef __ia64__
+	ret = __clone2(fn, stack, __LXC_STACK_SIZE, flags | SIGCHLD, arg, pidfd);
+#else
+	ret = clone(fn, stack + __LXC_STACK_SIZE, flags | SIGCHLD, arg, pidfd);
+#endif
+	if (ret < 0)
+		SYSERROR("Failed to clone (%#x)", flags);
+
+	return ret;
+}
diff --git a/src/lxc/process_utils.h b/src/lxc/process_utils.h
new file mode 100644
index 0000000000..6016f792ef
--- /dev/null
+++ b/src/lxc/process_utils.h
@@ -0,0 +1,248 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#ifndef __LXC_PROCESS_UTILS_H
+#define __LXC_PROCESS_UTILS_H
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#ifndef CSIGNAL
+#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */
+#endif
+
+#ifndef CLONE_VM
+#define CLONE_VM 0x00000100 /* set if VM shared between processes */
+#endif
+
+#ifndef CLONE_FS
+#define CLONE_FS 0x00000200 /* set if fs info shared between processes */
+#endif
+
+#ifndef CLONE_FILES
+#define CLONE_FILES 0x00000400 /* set if open files shared between processes */
+#endif
+
+#ifndef CLONE_SIGHAND
+#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */
+#endif
+
+#ifndef CLONE_PIDFD
+#define CLONE_PIDFD 0x00001000 /* set if a pidfd should be placed in parent */
+#endif
+
+#ifndef CLONE_PTRACE
+#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */
+#endif
+
+#ifndef CLONE_VFORK
+#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */
+#endif
+
+#ifndef CLONE_PARENT
+#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
+#endif
+
+#ifndef CLONE_THREAD
+#define CLONE_THREAD 0x00010000 /* Same thread group? */
+#endif
+
+#ifndef CLONE_NEWNS
+#define CLONE_NEWNS 0x00020000 /* New mount namespace group */
+#endif
+
+#ifndef CLONE_SYSVSEM
+#define CLONE_SYSVSEM 0x00040000 /* share system V SEM_UNDO semantics */
+#endif
+
+#ifndef CLONE_SETTLS
+#define CLONE_SETTLS 0x00080000 /* create a new TLS for the child */
+#endif
+
+#ifndef CLONE_PARENT_SETTID
+#define CLONE_PARENT_SETTID 0x00100000 /* set the TID in the parent */
+#endif
+
+#ifndef CLONE_CHILD_CLEARTID
+#define CLONE_CHILD_CLEARTID 0x00200000 /* clear the TID in the child */
+#endif
+
+#ifndef CLONE_DETACHED
+#define CLONE_DETACHED 0x00400000 /* Unused, ignored */
+#endif
+
+#ifndef CLONE_UNTRACED
+#define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */
+#endif
+
+#ifndef CLONE_CHILD_SETTID
+#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
+#endif
+
+#ifndef CLONE_NEWCGROUP
+#define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */
+#endif
+
+#ifndef CLONE_NEWUTS
+#define CLONE_NEWUTS 0x04000000 /* New utsname namespace */
+#endif
+
+#ifndef CLONE_NEWIPC
+#define CLONE_NEWIPC 0x08000000 /* New ipc namespace */
+#endif
+
+#ifndef CLONE_NEWUSER
+#define CLONE_NEWUSER 0x10000000 /* New user namespace */
+#endif
+
+#ifndef CLONE_NEWPID
+#define CLONE_NEWPID 0x20000000 /* New pid namespace */
+#endif
+
+#ifndef CLONE_NEWNET
+#define CLONE_NEWNET 0x40000000 /* New network namespace */
+#endif
+
+#ifndef CLONE_IO
+#define CLONE_IO 0x80000000 /* Clone io context */
+#endif
+
+/* Flags for the clone3() syscall. */
+#ifndef CLONE_CLEAR_SIGHAND
+#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */
+#endif
+
+#ifndef CLONE_INTO_CGROUP
+#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
+#endif
+
+/*
+ * cloning flags intersect with CSIGNAL so can be used with unshare and clone3
+ * syscalls only:
+ */
+#ifndef CLONE_NEWTIME
+#define CLONE_NEWTIME 0x00000080 /* New time namespace */
+#endif
+
+/* waitid */
+#ifndef P_PIDFD
+#define P_PIDFD 3
+#endif
+
+
+#if defined(__ia64__)
+int __clone2(int (*__fn)(void *__arg), void *__child_stack_base,
+	     size_t __child_stack_size, int __flags, void *__arg, ...);
+#else
+int clone(int (*fn)(void *), void *child_stack, int flags, void *arg, ...
+	  /* pid_t *ptid, struct user_desc *tls, pid_t *ctid */);
+#endif
+
+/**
+ * lxc_clone() - create a new process
+ *
+ * - allocate stack:
+ *   This function allocates a new stack the size of page and passes it to the
+ *   kernel.
+ *
+ * - support all CLONE_*flags:
+ *   This function supports all CLONE_* flags. If in doubt or not sufficiently
+ *   familiar with process creation in the kernel and interactions with libcs
+ *   this function should be used.
+ *
+ * - pthread_atfork() handlers depending on libc:
+ *   Whether this function runs pthread_atfork() handlers depends on the
+ *   corresponding libc wrapper. glibc currently does not run pthread_atfork()
+ *   handlers but does not guarantee that they are not. Other libcs might or
+ *   might not run pthread_atfork() handlers. If you require guarantees please
+ *   refer to the lxc_raw_clone*() functions in process_utils.{c,h}.
+ *
+ * - should call lxc_raw_getpid():
+ *   The child should use lxc_raw_getpid() to retrieve its pid.
+ */
+extern pid_t lxc_clone(int (*fn)(void *), void *arg, int flags, int *pidfd);
+
+
+/*
+ * lxc_raw_clone() - create a new process
+ *
+ * - fork() behavior:
+ *   This function returns 0 in the child and > 0 in the parent.
+ *
+ * - copy-on-write:
+ *   This function does not allocate a new stack and relies on copy-on-write
+ *   semantics.
+ *
+ * - supports subset of ClONE_* flags:
+ *   lxc_raw_clone() intentionally only supports a subset of the flags available
+ *   to the actual system call. Please refer to the implementation what flags
+ *   cannot be used. Also, please don't assume that just because a flag isn't
+ *   explicitly checked for as being unsupported that it is supported. If in
+ *   doubt or not sufficiently familiar with process creation in the kernel and
+ *   interactions with libcs this function should be used.
+ *
+ * - no pthread_atfork() handlers:
+ *   This function circumvents - as much as this this is possible - any libc
+ *   wrappers and thus does not run any pthread_atfork() handlers. Make sure
+ *   that this is safe to do in the context you are trying to call this
+ *   function.
+ *
+ * - must call lxc_raw_getpid():
+ *   The child must use lxc_raw_getpid() to retrieve its pid.
+ */
+extern pid_t lxc_raw_clone(unsigned long flags, int *pidfd);
+
+/*
+ * lxc_raw_clone_cb() - create a new process
+ *
+ * - non-fork() behavior:
+ *   Function does return pid of the child or -1 on error. Pass in a callback
+ *   function via the "fn" argument that gets executed in the child process.
+ *   The "args" argument is passed to "fn".
+ *
+ * All other comments that apply to lxc_raw_clone() apply to lxc_raw_clone_cb()
+ * as well.
+ */
+extern pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args,
+			      unsigned long flags, int *pidfd);
+
+#ifndef HAVE_EXECVEAT
+static inline int execveat(int dirfd, const char *pathname, char *const argv[],
+			   char *const envp[], int flags)
+{
+	return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
+}
+#else
+extern int execveat(int dirfd, const char *pathname, char *const argv[],
+		    char *const envp[], int flags);
+#endif
+
+/*
+ * Because of older glibc's pid cache (up to 2.25) whenever clone() is called
+ * the child must must retrieve it's own pid via lxc_raw_getpid().
+ */
+static inline pid_t lxc_raw_getpid(void)
+{
+	return (pid_t)syscall(SYS_getpid);
+}
+
+static inline pid_t lxc_raw_gettid(void)
+{
+#if __NR_gettid > 0
+	return syscall(__NR_gettid);
+#else
+	return lxc_raw_getpid();
+#endif
+}
+
+extern int lxc_raw_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
+				     unsigned int flags);
+
+#endif /* __LXC_PROCESS_UTILS_H */
diff --git a/src/lxc/raw_syscalls.h b/src/lxc/raw_syscalls.h
deleted file mode 100644
index 1219f28f43..0000000000
--- a/src/lxc/raw_syscalls.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/* SPDX-License-Identifier: LGPL-2.1+ */
-
-#ifndef __LXC_RAW_SYSCALL_H
-#define __LXC_RAW_SYSCALL_H
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE 1
-#endif
-#include <sched.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <signal.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-
-/* clone */
-#ifndef CLONE_PIDFD
-#define CLONE_PIDFD 0x00001000
-#endif
-
-/* waitid */
-#ifndef P_PIDFD
-#define P_PIDFD 3
-#endif
-
-/*
- * lxc_raw_clone() - create a new process
- *
- * - fork() behavior:
- *   This function returns 0 in the child and > 0 in the parent.
- *
- * - copy-on-write:
- *   This function does not allocate a new stack and relies on copy-on-write
- *   semantics.
- *
- * - supports subset of ClONE_* flags:
- *   lxc_raw_clone() intentionally only supports a subset of the flags available
- *   to the actual system call. Please refer to the implementation what flags
- *   cannot be used. Also, please don't assume that just because a flag isn't
- *   explicitly checked for as being unsupported that it is supported. If in
- *   doubt or not sufficiently familiar with process creation in the kernel and
- *   interactions with libcs this function should be used.
- *
- * - no pthread_atfork() handlers:
- *   This function circumvents - as much as this this is possible - any libc
- *   wrappers and thus does not run any pthread_atfork() handlers. Make sure
- *   that this is safe to do in the context you are trying to call this
- *   function.
- *
- * - must call lxc_raw_getpid():
- *   The child must use lxc_raw_getpid() to retrieve its pid.
- */
-extern pid_t lxc_raw_clone(unsigned long flags, int *pidfd);
-
-/*
- * lxc_raw_clone_cb() - create a new process
- *
- * - non-fork() behavior:
- *   Function does return pid of the child or -1 on error. Pass in a callback
- *   function via the "fn" argument that gets executed in the child process.
- *   The "args" argument is passed to "fn".
- *
- * All other comments that apply to lxc_raw_clone() apply to lxc_raw_clone_cb()
- * as well.
- */
-extern pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args,
-			      unsigned long flags, int *pidfd);
-
-extern int lxc_raw_execveat(int dirfd, const char *pathname, char *const argv[],
-			    char *const envp[], int flags);
-
-/*
- * Because of older glibc's pid cache (up to 2.25) whenever clone() is called
- * the child must must retrieve it's own pid via lxc_raw_getpid().
- */
-static inline pid_t lxc_raw_getpid(void)
-{
-	return (pid_t)syscall(SYS_getpid);
-}
-
-static inline pid_t lxc_raw_gettid(void)
-{
-#if __NR_gettid > 0
-	return syscall(__NR_gettid);
-#else
-	return lxc_raw_getpid();
-#endif
-}
-
-extern int lxc_raw_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
-				     unsigned int flags);
-
-#endif /* __LXC_RAW_SYSCALL_H */
diff --git a/src/lxc/rexec.c b/src/lxc/rexec.c
index cd76efb3c4..cf198c0211 100644
--- a/src/lxc/rexec.c
+++ b/src/lxc/rexec.c
@@ -13,7 +13,7 @@
 #include "file_utils.h"
 #include "macro.h"
 #include "memory_utils.h"
-#include "raw_syscalls.h"
+#include "process_utils.h"
 #include "string_utils.h"
 #include "syscall_wrappers.h"
 
diff --git a/src/lxc/start.c b/src/lxc/start.c
index ba92393ebf..fa84461c30 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -47,7 +47,7 @@
 #include "monitor.h"
 #include "namespace.h"
 #include "network.h"
-#include "raw_syscalls.h"
+#include "process_utils.h"
 #include "start.h"
 #include "storage/storage.h"
 #include "storage/storage_utils.h"
diff --git a/src/lxc/utils.c b/src/lxc/utils.c
index 2cf99945fb..88d0f85ee5 100644
--- a/src/lxc/utils.c
+++ b/src/lxc/utils.c
@@ -35,7 +35,7 @@
 #include "memory_utils.h"
 #include "namespace.h"
 #include "parse.h"
-#include "raw_syscalls.h"
+#include "process_utils.h"
 #include "syscall_wrappers.h"
 #include "utils.h"
 
diff --git a/src/lxc/utils.h b/src/lxc/utils.h
index 45ca5270de..cf2c04251b 100644
--- a/src/lxc/utils.h
+++ b/src/lxc/utils.h
@@ -25,7 +25,7 @@
 #include "initutils.h"
 #include "macro.h"
 #include "memory_utils.h"
-#include "raw_syscalls.h"
+#include "process_utils.h"
 #include "string_utils.h"
 
 /* returns 1 on success, 0 if there were any failures */
diff --git a/src/tests/Makefile.am b/src/tests/Makefile.am
index 493b33c9d8..69b8f30fbc 100644
--- a/src/tests/Makefile.am
+++ b/src/tests/Makefile.am
@@ -30,7 +30,7 @@ lxc_test_parse_config_file_SOURCES = parse_config_file.c \
 lxc_test_raw_clone_SOURCES = lxc_raw_clone.c \
 			     lxctest.h \
 			     ../lxc/namespace.c ../lxc/namespace.h \
-			     ../lxc/raw_syscalls.c ../lxc/raw_syscalls.h
+			     ../lxc/process_utils.c ../lxc/process_utils.h
 			     ../lxc/utils.c ../lxc/utils.h
 lxc_test_reboot_SOURCES = reboot.c
 lxc_test_saveconfig_SOURCES = saveconfig.c
diff --git a/src/tests/lxc_raw_clone.c b/src/tests/lxc_raw_clone.c
index 655454f395..f72e20cccd 100644
--- a/src/tests/lxc_raw_clone.c
+++ b/src/tests/lxc_raw_clone.c
@@ -39,7 +39,7 @@
 
 #include "lxctest.h"
 #include "namespace.h"
-#include "raw_syscalls.h"
+#include "process_utils.h"
 #include "utils.h"
 
 int main(int argc, char *argv[])

From 96086a6b7b4f62c4f397de146b11456efa5327f7 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 15 May 2020 13:42:56 +0200
Subject: [PATCH 4/5] process_utils: add clone3() support

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 configure.ac            |  8 +++++--
 src/lxc/process_utils.h | 47 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 7f589f9405..4e11254b5b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -622,7 +622,11 @@ AC_CHECK_HEADER([ifaddrs.h],
 AC_HEADER_MAJOR
 
 # Check for some syscalls functions
-AC_CHECK_FUNCS([setns pivot_root sethostname unshare rand_r confstr faccessat gettid memfd_create move_mount open_tree execveat])
+AC_CHECK_FUNCS([setns pivot_root sethostname unshare rand_r confstr faccessat gettid memfd_create move_mount open_tree execveat clone3])
+# HAVE_STRUCT_CLONE_ARGS={0,1}
+AC_CHECK_TYPES([struct clone_args], [], [], [[#include <linux/sched.h>]])
+AC_CHECK_MEMBERS([struct clone_args.set_tid],[],[],[[#include <linux/sched.h>]])
+AC_CHECK_MEMBERS([struct clone_args.cgroup],[],[],[[#include <linux/sched.h>]])
 
 # Check for strerror_r() support. Defines:
 # - HAVE_STRERROR_R if available
@@ -761,7 +765,7 @@ AX_CHECK_COMPILE_FLAG([-Wstringop-overflow], [CFLAGS="$CFLAGS -Wstringop-overflo
 AX_CHECK_LINK_FLAG([-z relro], [LDFLAGS="$LDFLAGS -z relro"],,[])
 AX_CHECK_LINK_FLAG([-z now], [LDFLAGS="$LDFLAGS -z now"],,[])
 
-CFLAGS="$CFLAGS -Wvla -std=gnu11"
+CFLAGS="$CFLAGS -Wvla -std=gnu11 -fms-extensions"
 if test "x$enable_werror" = "xyes"; then
 	CFLAGS="$CFLAGS -Werror"
 fi
diff --git a/src/lxc/process_utils.h b/src/lxc/process_utils.h
index 6016f792ef..8795247596 100644
--- a/src/lxc/process_utils.h
+++ b/src/lxc/process_utils.h
@@ -6,6 +6,7 @@
 #ifndef _GNU_SOURCE
 #define _GNU_SOURCE 1
 #endif
+#include <linux/sched.h>
 #include <sched.h>
 #include <signal.h>
 #include <stdbool.h>
@@ -14,6 +15,9 @@
 #include <sys/syscall.h>
 #include <unistd.h>
 
+#include "config.h"
+#include "syscall_numbers.h"
+
 #ifndef CSIGNAL
 #define CSIGNAL 0x000000ff /* signal mask to be sent at exit */
 #endif
@@ -136,6 +140,49 @@
 #define P_PIDFD 3
 #endif
 
+#ifndef CLONE_ARGS_SIZE_VER0
+#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
+#endif
+
+#ifndef CLONE_ARGS_SIZE_VER1
+#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
+#endif
+
+#ifndef CLONE_ARGS_SIZE_VER2
+#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
+#endif
+
+#ifndef HAVE_STRUCT_CLONE_ARGS
+struct clone_args {
+	__aligned_u64 flags;
+	__aligned_u64 pidfd;
+	__aligned_u64 child_tid;
+	__aligned_u64 parent_tid;
+	__aligned_u64 exit_signal;
+	__aligned_u64 stack;
+	__aligned_u64 stack_size;
+	__aligned_u64 tls;
+	__aligned_u64 set_tid;
+	__aligned_u64 set_tid_size;
+	__aligned_u64 cgroup;
+};
+#endif
+
+struct lxc_clone_args {
+	struct clone_args;
+#ifndef HAVE_STRUCT_CLONE_ARGS_SET_TID
+	__aligned_u64 set_tid;
+	__aligned_u64 set_tid_size;
+#endif
+#ifndef HAVE_STRUCT_CLONE_ARGS_CGROUP
+	__aligned_u64 cgroup;
+#endif
+};
+
+static inline pid_t lxc_clone3(struct lxc_clone_args *args, size_t size)
+{
+	return syscall(__NR_clone3, (struct clone_args *)args, size);
+}
 
 #if defined(__ia64__)
 int __clone2(int (*__fn)(void *__arg), void *__child_stack_base,

From 26d02c9ca2fb95d9f233eb2147f667803d34e439 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 15 May 2020 14:13:07 +0200
Subject: [PATCH 5/5] process_utils: make lxc use clone3() whenever possible

No more weird api quirks between architectures and cool new features.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/process_utils.c | 20 +++++++++++++++++++-
 src/lxc/process_utils.h | 10 +++++++++-
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/src/lxc/process_utils.c b/src/lxc/process_utils.c
index 89abddec54..5be89532a3 100644
--- a/src/lxc/process_utils.c
+++ b/src/lxc/process_utils.c
@@ -28,7 +28,7 @@ lxc_log_define(process_utils, lxc);
  * The nice thing about this is that we get fork() behavior. That is
  * lxc_raw_clone() returns 0 in the child and the child pid in the parent.
  */
-__returns_twice pid_t lxc_raw_clone(unsigned long flags, int *pidfd)
+__returns_twice static pid_t __lxc_raw_clone(unsigned long flags, int *pidfd)
 {
 	/*
 	 * These flags don't interest at all so we don't jump through any hoops
@@ -97,6 +97,24 @@ __returns_twice pid_t lxc_raw_clone(unsigned long flags, int *pidfd)
 #endif
 }
 
+__returns_twice pid_t lxc_raw_clone(unsigned long flags, int *pidfd)
+{
+	pid_t pid;
+	struct lxc_clone_args args = {
+		.flags		= flags,
+		.pidfd		= ptr_to_u64(pidfd),
+		.exit_signal	= SIGCHLD,
+	};
+
+	pid = lxc_clone3(&args, CLONE_ARGS_SIZE_VER0);
+	if (pid < 0 && errno == ENOSYS) {
+		SYSTRACE("Falling back to legacy clone");
+		return __lxc_raw_clone(flags, pidfd);
+	}
+
+	return pid;
+}
+
 pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, unsigned long flags,
 		       int *pidfd)
 {
diff --git a/src/lxc/process_utils.h b/src/lxc/process_utils.h
index 8795247596..48f3c96f95 100644
--- a/src/lxc/process_utils.h
+++ b/src/lxc/process_utils.h
@@ -15,6 +15,7 @@
 #include <sys/syscall.h>
 #include <unistd.h>
 
+#include "compiler.h"
 #include "config.h"
 #include "syscall_numbers.h"
 
@@ -168,6 +169,13 @@ struct clone_args {
 };
 #endif
 
+#ifndef ptr_to_u64
+#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
+#endif
+#ifndef u64_to_ptr
+#define u64_to_ptr(x) ((void *)(uintptr_t)x)
+#endif
+
 struct lxc_clone_args {
 	struct clone_args;
 #ifndef HAVE_STRUCT_CLONE_ARGS_SET_TID
@@ -179,7 +187,7 @@ struct lxc_clone_args {
 #endif
 };
 
-static inline pid_t lxc_clone3(struct lxc_clone_args *args, size_t size)
+__returns_twice static inline pid_t lxc_clone3(struct lxc_clone_args *args, size_t size)
 {
 	return syscall(__NR_clone3, (struct clone_args *)args, size);
 }


More information about the lxc-devel mailing list