[lxc-devel] [lxc/master] seccomp: SECCOMP_RET_USER_NOTIF support

brauner on Github lxc-bot at linuxcontainers.org
Wed Apr 24 16:07:44 UTC 2019


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 364 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20190424/ea6b3064/attachment.bin>
-------------- next part --------------
From 444a3958692d59e4268737a52211998182dc8992 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Sun, 21 Apr 2019 21:03:51 +0200
Subject: [PATCH] seccomp: SECCOMP_RET_USER_NOTIF support

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/af_unix.c      |  23 +++--
 src/lxc/attach.c       |  33 ++++++-
 src/lxc/commands.c     | 201 +++++++++++++++++++++++++++++------------
 src/lxc/commands.h     |   6 ++
 src/lxc/conf.c         |   5 +
 src/lxc/conf.h         |   5 +
 src/lxc/lxccontainer.c |  26 ++++++
 src/lxc/lxccontainer.h |  20 ++++
 src/lxc/lxcseccomp.h   |  57 ++++++++++++
 src/lxc/macro.h        |   6 ++
 src/lxc/seccomp.c      |  83 +++++++++++++++++
 src/lxc/start.c        |  47 +++++++++-
 12 files changed, 445 insertions(+), 67 deletions(-)

diff --git a/src/lxc/af_unix.c b/src/lxc/af_unix.c
index 3b00d64fb5..06700fac1e 100644
--- a/src/lxc/af_unix.c
+++ b/src/lxc/af_unix.c
@@ -201,7 +201,8 @@ int lxc_abstract_unix_recv_fds(int fd, int *recvfds, int num_recvfds,
 	struct iovec iov;
 	struct cmsghdr *cmsg = NULL;
 	char buf[1] = {0};
-	size_t cmsgbufsize = CMSG_SPACE(num_recvfds * sizeof(int));
+	size_t cmsgbufsize = CMSG_SPACE(sizeof(struct ucred)) +
+			     CMSG_SPACE(num_recvfds * sizeof(int));
 
 	memset(&msg, 0, sizeof(msg));
 	memset(&iov, 0, sizeof(iov));
@@ -224,12 +225,20 @@ int lxc_abstract_unix_recv_fds(int fd, int *recvfds, int num_recvfds,
 	if (ret <= 0)
 		goto out;
 
-	cmsg = CMSG_FIRSTHDR(&msg);
-
-	memset(recvfds, -1, num_recvfds * sizeof(int));
-	if (cmsg && cmsg->cmsg_len == CMSG_LEN(num_recvfds * sizeof(int)) &&
-	    cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS)
-		memcpy(recvfds, CMSG_DATA(cmsg), num_recvfds * sizeof(int));
+	/*
+	 * If SO_PASSCRED is set we will always get a ucred message.
+	 */
+	for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+		if (cmsg->cmsg_type != SCM_RIGHTS)
+			continue;
+
+		memset(recvfds, -1, num_recvfds * sizeof(int));
+		if (cmsg &&
+		    cmsg->cmsg_len == CMSG_LEN(num_recvfds * sizeof(int)) &&
+		    cmsg->cmsg_level == SOL_SOCKET)
+			memcpy(recvfds, CMSG_DATA(cmsg), num_recvfds * sizeof(int));
+		break;
+	}
 
 out:
 	return ret;
diff --git a/src/lxc/attach.c b/src/lxc/attach.c
index f2a1339d3f..6f2f605a2e 100644
--- a/src/lxc/attach.c
+++ b/src/lxc/attach.c
@@ -853,11 +853,24 @@ static int attach_child_main(struct attach_clone_payload *payload)
 
 	if (init_ctx->container && init_ctx->container->lxc_conf &&
 	    init_ctx->container->lxc_conf->seccomp) {
-		ret = lxc_seccomp_load(init_ctx->container->lxc_conf);
+		struct lxc_conf *conf = init_ctx->container->lxc_conf;
+
+		ret = lxc_seccomp_load(conf);
 		if (ret < 0)
 			goto on_error;
 
 		TRACE("Loaded seccomp profile");
+
+#if HAVE_SCMP_FILTER_CTX
+		if (conf->has_seccomp_notify) {
+			ret = lxc_abstract_unix_send_fds(payload->ipc_socket,
+							 &conf->seccomp_notify_fd,
+							 1, NULL, 0);
+			close_prot_errno_disarm(conf->seccomp_notify_fd);
+			if (ret < 0)
+				goto on_error;
+		}
+#endif
 	}
 
 	close(payload->ipc_socket);
@@ -1311,6 +1324,24 @@ int lxc_attach(const char *name, const char *lxcpath,
 			TRACE("Sent LSM label file descriptor %d to child", labelfd);
 		}
 
+#if HAVE_SCMP_FILTER_CTX
+		if (conf->seccomp && conf->has_seccomp_notify) {
+			ret = lxc_abstract_unix_recv_fds(ipc_sockets[0],
+							 &conf->seccomp_notify_fd,
+							 1, NULL, 0);
+			if (ret < 0)
+				goto close_mainloop;
+
+			SYSERROR("Retrieved seccomp fd %d", conf->seccomp_notify_fd);
+			ret = lxc_cmd_seccomp_notify_add_listener(name, lxcpath,
+								  conf->seccomp_notify_fd,
+								  -1, 0);
+			close_prot_errno_disarm(conf->seccomp_notify_fd);
+			if (ret < 0)
+				goto close_mainloop;
+		}
+#endif
+
 		/* We're done, the child process should now execute whatever it
 		 * is that the user requested. The parent can now track it with
 		 * waitpid() or similar.
diff --git a/src/lxc/commands.c b/src/lxc/commands.c
index ad2c578e14..71b6b34500 100644
--- a/src/lxc/commands.c
+++ b/src/lxc/commands.c
@@ -29,6 +29,7 @@
 #include <fcntl.h>
 #include <malloc.h>
 #include <poll.h>
+#include <seccomp.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -47,6 +48,7 @@
 #include "log.h"
 #include "lxc.h"
 #include "lxclock.h"
+#include "lxcseccomp.h"
 #include "mainloop.h"
 #include "memory_utils.h"
 #include "monitor.h"
@@ -97,6 +99,7 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd)
 		[LXC_CMD_ADD_STATE_CLIENT]    = "add_state_client",
 		[LXC_CMD_CONSOLE_LOG]         = "console_log",
 		[LXC_CMD_SERVE_STATE_CLIENTS] = "serve_state_clients",
+		[LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER] = "seccomp_notify_add_listener",
 	};
 
 	if (cmd >= LXC_CMD_MAX)
@@ -244,14 +247,21 @@ static int lxc_cmd_send(const char *name, struct lxc_cmd_rr *cmd,
 	if (ret < 0 || (size_t)ret != sizeof(cmd->req))
 		return -1;
 
-	if (cmd->req.datalen <= 0)
-		return move_fd(client_fd);
+	if (cmd->req.cmd == LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER) {
+		int notify_fd = PTR_TO_INT(cmd->req.data);
+		ret = lxc_abstract_unix_send_fds(client_fd, &notify_fd, 1, NULL, 0);
+		if (ret <= 0)
+			return -1;
+	} else {
+		if (cmd->req.datalen <= 0)
+			return move_fd(client_fd);
 
-	errno = EMSGSIZE;
-	ret = lxc_send_nointr(client_fd, (void *)cmd->req.data,
-			      cmd->req.datalen, MSG_NOSIGNAL);
-	if (ret < 0 || ret != (ssize_t)cmd->req.datalen)
-		return -1;
+		errno = EMSGSIZE;
+		ret = lxc_send_nointr(client_fd, (void *)cmd->req.data,
+				      cmd->req.datalen, MSG_NOSIGNAL);
+		if (ret < 0 || ret != (ssize_t)cmd->req.datalen)
+			return -1;
+	}
 
 	return move_fd(client_fd);
 }
@@ -373,7 +383,8 @@ pid_t lxc_cmd_get_init_pid(const char *name, const char *lxcpath)
 }
 
 static int lxc_cmd_get_init_pid_callback(int fd, struct lxc_cmd_req *req,
-					 struct lxc_handler *handler)
+					 struct lxc_handler *handler,
+					 struct lxc_epoll_descr *descr)
 {
 	intmax_t pid = handler->pid;
 
@@ -407,7 +418,8 @@ int lxc_cmd_get_clone_flags(const char *name, const char *lxcpath)
 }
 
 static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
-					    struct lxc_handler *handler)
+					    struct lxc_handler *handler,
+					    struct lxc_epoll_descr *descr)
 {
 	struct lxc_cmd_rsp rsp = { .data = INT_TO_PTR(handler->ns_clone_flags) };
 
@@ -457,7 +469,8 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
 }
 
 static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
-				       struct lxc_handler *handler)
+				       struct lxc_handler *handler,
+				       struct lxc_epoll_descr *descr)
 {
 	const char *path;
 	struct lxc_cmd_rsp rsp;
@@ -509,7 +522,8 @@ char *lxc_cmd_get_config_item(const char *name, const char *item,
 }
 
 static int lxc_cmd_get_config_item_callback(int fd, struct lxc_cmd_req *req,
-					    struct lxc_handler *handler)
+					    struct lxc_handler *handler,
+					    struct lxc_epoll_descr *descr)
 {
 	__do_free char *cidata = NULL;
 	int cilen;
@@ -575,7 +589,8 @@ int lxc_cmd_get_state(const char *name, const char *lxcpath)
 }
 
 static int lxc_cmd_get_state_callback(int fd, struct lxc_cmd_req *req,
-				      struct lxc_handler *handler)
+				      struct lxc_handler *handler,
+				      struct lxc_epoll_descr *descr)
 {
 	struct lxc_cmd_rsp rsp = { .data = INT_TO_PTR(handler->state) };
 
@@ -622,7 +637,8 @@ int lxc_cmd_stop(const char *name, const char *lxcpath)
 }
 
 static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req,
-				 struct lxc_handler *handler)
+				 struct lxc_handler *handler,
+				 struct lxc_epoll_descr *descr)
 {
 	struct lxc_cmd_rsp rsp;
 	int stopsignal = SIGKILL;
@@ -665,7 +681,8 @@ int lxc_cmd_terminal_winch(const char *name, const char *lxcpath)
 }
 
 static int lxc_cmd_terminal_winch_callback(int fd, struct lxc_cmd_req *req,
-					   struct lxc_handler *handler)
+					   struct lxc_handler *handler,
+					   struct lxc_epoll_descr *descr)
 {
 	/* should never be called */
 	return -1;
@@ -720,7 +737,8 @@ int lxc_cmd_console(const char *name, int *ttynum, int *fd, const char *lxcpath)
 }
 
 static int lxc_cmd_console_callback(int fd, struct lxc_cmd_req *req,
-				    struct lxc_handler *handler)
+				    struct lxc_handler *handler,
+				    struct lxc_epoll_descr *descr)
 {
 	int masterfd, ret;
 	struct lxc_cmd_rsp rsp;
@@ -773,7 +791,8 @@ char *lxc_cmd_get_name(const char *hashed_sock_name)
 }
 
 static int lxc_cmd_get_name_callback(int fd, struct lxc_cmd_req *req,
-				     struct lxc_handler *handler)
+				     struct lxc_handler *handler,
+				     struct lxc_epoll_descr *descr)
 {
 	struct lxc_cmd_rsp rsp;
 
@@ -811,7 +830,8 @@ char *lxc_cmd_get_lxcpath(const char *hashed_sock_name)
 }
 
 static int lxc_cmd_get_lxcpath_callback(int fd, struct lxc_cmd_req *req,
-					struct lxc_handler *handler)
+					struct lxc_handler *handler,
+					struct lxc_epoll_descr *descr)
 {
 	struct lxc_cmd_rsp rsp;
 
@@ -872,7 +892,8 @@ int lxc_cmd_add_state_client(const char *name, const char *lxcpath,
 }
 
 static int lxc_cmd_add_state_client_callback(int fd, struct lxc_cmd_req *req,
-					     struct lxc_handler *handler)
+					     struct lxc_handler *handler,
+					     struct lxc_epoll_descr *descr)
 {
 	int ret;
 	struct lxc_cmd_rsp rsp = {0};
@@ -943,7 +964,8 @@ int lxc_cmd_console_log(const char *name, const char *lxcpath,
 }
 
 static int lxc_cmd_console_log_callback(int fd, struct lxc_cmd_req *req,
-					struct lxc_handler *handler)
+					struct lxc_handler *handler,
+					struct lxc_epoll_descr *descr)
 {
 	struct lxc_cmd_rsp rsp;
 	uint64_t buffer_size = handler->conf->console.buffer_size;
@@ -1002,7 +1024,8 @@ int lxc_cmd_serve_state_clients(const char *name, const char *lxcpath,
 }
 
 static int lxc_cmd_serve_state_clients_callback(int fd, struct lxc_cmd_req *req,
-						struct lxc_handler *handler)
+						struct lxc_handler *handler,
+						struct lxc_epoll_descr *descr)
 {
 	int ret;
 	lxc_state_t state = PTR_TO_INT(req->data);
@@ -1025,62 +1048,126 @@ static int lxc_cmd_serve_state_clients_callback(int fd, struct lxc_cmd_req *req,
 	return 1;
 }
 
+int lxc_cmd_seccomp_notify_add_listener(const char *name, const char *lxcpath,
+					int fd,
+					/* unused */ unsigned int command,
+					/* unused */ unsigned int flags)
+{
+
+#if HAVE_SCMP_FILTER_CTX
+	int ret, stopped;
+	struct lxc_cmd_rr cmd = {
+		.req = {
+			.cmd = LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER,
+			.data = INT_TO_PTR(fd),
+		},
+	};
+
+	ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
+	if (ret < 0) {
+		SYSERROR("Failed to execute command");
+		return -1;
+	}
+
+	return cmd.rsp.ret;
+#else
+	return minus_one_set_errno(ENOSYS);
+#endif
+}
+
+static int lxc_cmd_seccomp_notify_add_listener_callback(int fd,
+							struct lxc_cmd_req *req,
+							struct lxc_handler *handler,
+							struct lxc_epoll_descr *descr)
+{
+	__do_close_prot_errno int recv_fd = -EBADF;
+	int notify_fd = -EBADF;
+	struct lxc_cmd_rsp rsp = {0};
+	int ret;
+
+	ret = lxc_abstract_unix_recv_fds(fd, &recv_fd, 1, NULL, 0);
+	if (ret <= 0)
+		goto reap_client_fd;
+
+	ret = lxc_mainloop_add_handler(descr, notify_fd,
+				       seccomp_notify_handler, handler);
+	notify_fd = move_fd(recv_fd);
+	if (ret < 0)
+		goto reap_client_fd;
+
+	ret = lxc_cmd_rsp_send(fd, &rsp);
+	if (ret < 0)
+		goto reap_client_fd;
+
+	return 0;
+
+reap_client_fd:
+	/* Special indicator to lxc_cmd_handler() to close the fd and do related
+	 * cleanup.
+	 */
+	return 1;
+}
+
 static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
-			   struct lxc_handler *handler)
+			   struct lxc_handler *handler,
+			   struct lxc_epoll_descr *descr)
 {
-	typedef int (*callback)(int, struct lxc_cmd_req *, struct lxc_handler *);
+	typedef int (*callback)(int, struct lxc_cmd_req *, struct lxc_handler *,
+				struct lxc_epoll_descr *);
 
 	callback cb[LXC_CMD_MAX] = {
-		[LXC_CMD_CONSOLE]             = lxc_cmd_console_callback,
-		[LXC_CMD_TERMINAL_WINCH]      = lxc_cmd_terminal_winch_callback,
-		[LXC_CMD_STOP]                = lxc_cmd_stop_callback,
-		[LXC_CMD_GET_STATE]           = lxc_cmd_get_state_callback,
-		[LXC_CMD_GET_INIT_PID]        = lxc_cmd_get_init_pid_callback,
-		[LXC_CMD_GET_CLONE_FLAGS]     = lxc_cmd_get_clone_flags_callback,
-		[LXC_CMD_GET_CGROUP]          = lxc_cmd_get_cgroup_callback,
-		[LXC_CMD_GET_CONFIG_ITEM]     = lxc_cmd_get_config_item_callback,
-		[LXC_CMD_GET_NAME]            = lxc_cmd_get_name_callback,
-		[LXC_CMD_GET_LXCPATH]         = lxc_cmd_get_lxcpath_callback,
-		[LXC_CMD_ADD_STATE_CLIENT]    = lxc_cmd_add_state_client_callback,
-		[LXC_CMD_CONSOLE_LOG]         = lxc_cmd_console_log_callback,
-		[LXC_CMD_SERVE_STATE_CLIENTS] = lxc_cmd_serve_state_clients_callback,
+		[LXC_CMD_CONSOLE]                     = lxc_cmd_console_callback,
+		[LXC_CMD_TERMINAL_WINCH]              = lxc_cmd_terminal_winch_callback,
+		[LXC_CMD_STOP]                        = lxc_cmd_stop_callback,
+		[LXC_CMD_GET_STATE]                   = lxc_cmd_get_state_callback,
+		[LXC_CMD_GET_INIT_PID]                = lxc_cmd_get_init_pid_callback,
+		[LXC_CMD_GET_CLONE_FLAGS]             = lxc_cmd_get_clone_flags_callback,
+		[LXC_CMD_GET_CGROUP]                  = lxc_cmd_get_cgroup_callback,
+		[LXC_CMD_GET_CONFIG_ITEM]             = lxc_cmd_get_config_item_callback,
+		[LXC_CMD_GET_NAME]                    = lxc_cmd_get_name_callback,
+		[LXC_CMD_GET_LXCPATH]                 = lxc_cmd_get_lxcpath_callback,
+		[LXC_CMD_ADD_STATE_CLIENT]            = lxc_cmd_add_state_client_callback,
+		[LXC_CMD_CONSOLE_LOG]                 = lxc_cmd_console_log_callback,
+		[LXC_CMD_SERVE_STATE_CLIENTS]         = lxc_cmd_serve_state_clients_callback,
+		[LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER] = lxc_cmd_seccomp_notify_add_listener_callback,
 	};
 
 	if (req->cmd >= LXC_CMD_MAX) {
 		ERROR("Undefined command id %d", req->cmd);
 		return -1;
 	}
-	return cb[req->cmd](fd, req, handler);
+	return cb[req->cmd](fd, req, handler, descr);
 }
 
 static void lxc_cmd_fd_cleanup(int fd, struct lxc_handler *handler,
-			       struct lxc_epoll_descr *descr,
-			       const lxc_cmd_t cmd)
+			       struct lxc_epoll_descr *descr, const lxc_cmd_t cmd)
 {
 	struct lxc_list *cur, *next;
 
 	lxc_terminal_free(handler->conf, fd);
 	lxc_mainloop_del_handler(descr, fd);
-	if (cmd != LXC_CMD_ADD_STATE_CLIENT) {
-		close(fd);
-		return;
-	}
-
-	lxc_list_for_each_safe(cur, &handler->conf->state_clients, next) {
-		struct lxc_state_client *client = cur->elem;
-
-		if (client->clientfd != fd)
-			continue;
 
-		/* kick client from list */
-		lxc_list_del(cur);
-		close(client->clientfd);
-		free(cur->elem);
-		free(cur);
-		/* No need to walk the whole list. If we found the state client
-		 * fd there can't be a second one.
-		 */
+	switch (cmd) {
+	case LXC_CMD_ADD_STATE_CLIENT:
+		lxc_list_for_each_safe(cur, &handler->conf->state_clients, next) {
+			struct lxc_state_client *client = cur->elem;
+
+			if (client->clientfd != fd)
+				continue;
+
+			/* kick client from list */
+			lxc_list_del(cur);
+			close(client->clientfd);
+			free(cur->elem);
+			free(cur);
+			/* No need to walk the whole list. If we found the state
+			 * client fd there can't be a second one.
+			 */
+			break;
+		}
 		break;
+	default:
+		close(fd);
 	}
 }
 
@@ -1139,7 +1226,7 @@ static int lxc_cmd_handler(int fd, uint32_t events, void *data,
 		req.data = reqdata;
 	}
 
-	ret = lxc_cmd_process(fd, &req, handler);
+	ret = lxc_cmd_process(fd, &req, handler, descr);
 	if (ret) {
 		/* This is not an error, but only a request to close fd. */
 		ret = LXC_MAINLOOP_CONTINUE;
diff --git a/src/lxc/commands.h b/src/lxc/commands.h
index 2c024b65d1..d7d0c6096a 100644
--- a/src/lxc/commands.h
+++ b/src/lxc/commands.h
@@ -46,6 +46,7 @@ typedef enum {
 	LXC_CMD_ADD_STATE_CLIENT,
 	LXC_CMD_CONSOLE_LOG,
 	LXC_CMD_SERVE_STATE_CLIENTS,
+	LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER,
 	LXC_CMD_MAX,
 } lxc_cmd_t;
 
@@ -124,5 +125,10 @@ extern int lxc_cmd_mainloop_add(const char *name, struct lxc_epoll_descr *descr,
 extern int lxc_try_cmd(const char *name, const char *lxcpath);
 extern int lxc_cmd_console_log(const char *name, const char *lxcpath,
 			       struct lxc_console_log *log);
+extern int lxc_cmd_seccomp_notify_add_listener(const char *name,
+					       const char *lxcpath,
+					       int fd,
+					       /* unused */ unsigned int command,
+					       /* unused */ unsigned int flags);
 
 #endif /* __commands_h */
diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index 986cb89b02..f1e9c4cc65 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -2752,6 +2752,11 @@ struct lxc_conf *lxc_conf_init(void)
 	new->lsm_aa_profile = NULL;
 	lxc_list_init(&new->lsm_aa_raw);
 	new->lsm_se_context = NULL;
+	new->has_seccomp_notify = false;
+	new->seccomp_notify_fd = -EBADF;
+	new->seccomp_notify_proxy_fd = -EBADF;
+	new->seccomp_notify_req = NULL;
+	new->seccomp_notify_resp = NULL;
 	new->tmp_umount_proc = false;
 	new->tmp_umount_proc = 0;
 	new->shmount.path_host = NULL;
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index 85daf1b6a5..f44c1b29e4 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -299,6 +299,11 @@ struct lxc_conf {
 	unsigned int seccomp_allow_nesting;
 #if HAVE_SCMP_FILTER_CTX
 	scmp_filter_ctx seccomp_ctx;
+	bool has_seccomp_notify;
+	int seccomp_notify_fd;
+	int seccomp_notify_proxy_fd;
+	struct seccomp_notif *seccomp_notify_req;
+	struct seccomp_notif_resp *seccomp_notify_resp;
 #endif
 	int maincmd_fd;
 	unsigned int autodev;  /* if 1, mount and fill a /dev at start */
diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
index 72d87410f8..6d7d7edeb8 100644
--- a/src/lxc/lxccontainer.c
+++ b/src/lxc/lxccontainer.c
@@ -5725,3 +5725,29 @@ bool lxc_has_api_extension(const char *extension)
 
 	return false;
 }
+
+int seccomp_notify(struct lxc_container *c, unsigned int cmd, int fd)
+{
+	if (!c || !c->lxc_conf)
+		return minus_one_set_errno(-EINVAL);
+
+	switch (cmd) {
+	case LXC_SECCOMP_NOTIFY_GET_FD:
+		if (fd)
+			return minus_one_set_errno(-EINVAL);
+
+		return c->lxc_conf->seccomp_notify_fd;
+	case LXC_SECCOMP_NOTIFY_SET_FD:
+		if (fd < 0)
+			return minus_one_set_errno(-EINVAL);
+		c->lxc_conf->seccomp_notify_fd = fd;
+		return 0;
+	case LXC_SECCOMP_NOTIFY_SET_PROXY:
+		if (fd < 0)
+			return minus_one_set_errno(-EINVAL);
+		c->lxc_conf->seccomp_notify_proxy_fd = fd;
+		return 0;
+	}
+
+	return minus_one_set_errno(-EINVAL);
+}
diff --git a/src/lxc/lxccontainer.h b/src/lxc/lxccontainer.h
index c46be521ea..fe5ebcbed5 100644
--- a/src/lxc/lxccontainer.h
+++ b/src/lxc/lxccontainer.h
@@ -24,6 +24,7 @@
 #define __LXC_CONTAINER_H
 
 #include <malloc.h>
+#include <seccomp.h>
 #include <semaphore.h>
 #include <stdbool.h>
 #include <stdint.h>
@@ -60,6 +61,23 @@ struct lxc_mount {
 	int version;
 };
 
+enum {
+	LXC_SECCOMP_NOTIFY_SET_FD = 0,
+	LXC_SECCOMP_NOTIFY_GET_FD = 1,
+	LXC_SECCOMP_NOTIFY_SET_PROXY = 2,
+	LXC_SECCOMP_NOTIFY_MAX,
+};
+
+#if HAVE_SCMP_FILTER_CTX
+struct seccomp_notify_proxy_msg {
+	uint32_t version;
+	struct seccomp_notif req;
+	struct seccomp_notif_resp resp;
+	pid_t monitor_pid;
+	pid_t init_pid;
+};
+#endif
+
 /*!
  * An LXC container.
  *
@@ -867,6 +885,8 @@ struct lxc_container {
 	 */
 	int (*umount)(struct lxc_container *c, const char *target,
 		      unsigned long mountflags, struct lxc_mount *mnt);
+
+	int (*seccomp_notify)(struct lxc_container *c, unsigned int cmd, int fd);
 };
 
 /*!
diff --git a/src/lxc/lxcseccomp.h b/src/lxc/lxcseccomp.h
index 93d57bbdcd..612a22fc7f 100644
--- a/src/lxc/lxcseccomp.h
+++ b/src/lxc/lxcseccomp.h
@@ -24,12 +24,64 @@
 #ifndef __LXC_LXCSECCOMP_H
 #define __LXC_LXCSECCOMP_H
 
+#include <errno.h>
+#include <seccomp.h>
+
 #include "conf.h"
 
 #ifdef HAVE_SECCOMP
 extern int lxc_seccomp_load(struct lxc_conf *conf);
 extern int lxc_read_seccomp_config(struct lxc_conf *conf);
 extern void lxc_seccomp_free(struct lxc_conf *conf);
+
+#ifndef SCMP_ACT_USER_NOTIF
+#define SCMP_ACT_USER_NOTIF 0x7fc00000U
+#endif
+
+/*
+ * User notification bits. It's a little unfortunate that we don't export
+ * system.h, so we end up having to define all these structures again.
+ * SECCOMP_RET_USER_NOTIF was added in kernel 5.0.
+ */
+#ifndef SECCOMP_RET_USER_NOTIF
+#define SECCOMP_RET_USER_NOTIF	 0x7fc00000U
+
+struct seccomp_notif_sizes {
+	__u16 seccomp_notif;
+	__u16 seccomp_notif_resp;
+	__u16 seccomp_data;
+};
+
+struct seccomp_notif {
+	__u64 id;
+	__u32 pid;
+	__u32 flags;
+	struct seccomp_data data;
+};
+
+struct seccomp_notif_resp {
+	__u64 id;
+	__s64 val;
+	__s32 error;
+	__u32 flags;
+};
+
+#define SECCOMP_IOC_MAGIC               '!'
+#define SECCOMP_IO(nr)                  _IO(SECCOMP_IOC_MAGIC, nr)
+#define SECCOMP_IOR(nr, type)           _IOR(SECCOMP_IOC_MAGIC, nr, type)
+#define SECCOMP_IOW(nr, type)           _IOW(SECCOMP_IOC_MAGIC, nr, type)
+#define SECCOMP_IOWR(nr, type)          _IOWR(SECCOMP_IOC_MAGIC, nr, type)
+
+/* Flags for seccomp notification fd ioctl. */
+#define SECCOMP_IOCTL_NOTIF_RECV        SECCOMP_IOWR(0, struct seccomp_notif)
+#define SECCOMP_IOCTL_NOTIF_SEND        SECCOMP_IOWR(1, \
+                                                struct seccomp_notif_resp)
+#define SECCOMP_IOCTL_NOTIF_ID_VALID    SECCOMP_IOR(2, __u64)
+extern int seccomp_notif_get_fd(const scmp_filter_ctx ctx);
+#endif
+extern int seccomp_notify_handler(int fd, uint32_t events, void *data,
+				  struct lxc_epoll_descr *descr);
+
 #else
 static inline int lxc_seccomp_load(struct lxc_conf *conf)
 {
@@ -46,6 +98,11 @@ static inline void lxc_seccomp_free(struct lxc_conf *conf)
 	free(conf->seccomp);
 	conf->seccomp = NULL;
 }
+static inline int seccomp_notify_handler(int fd, uint32_t events, void *data,
+				  struct lxc_epoll_descr *descr)
+{
+	return -ENOSYS;
+}
 #endif
 
 #endif
diff --git a/src/lxc/macro.h b/src/lxc/macro.h
index b83e439115..7df3b56f03 100644
--- a/src/lxc/macro.h
+++ b/src/lxc/macro.h
@@ -407,4 +407,10 @@ enum {
 		__internal_fd__;            \
 	})
 
+#define minus_one_set_errno(__errno__) \
+	({                             \
+		errno = __errno__;     \
+		-1;                    \
+	})
+
 #endif /* __LXC_MACRO_H */
diff --git a/src/lxc/seccomp.c b/src/lxc/seccomp.c
index f90602e1f9..86de4203ef 100644
--- a/src/lxc/seccomp.c
+++ b/src/lxc/seccomp.c
@@ -33,7 +33,9 @@
 
 #include "config.h"
 #include "log.h"
+#include "lxccontainer.h"
 #include "lxcseccomp.h"
+#include "memory_utils.h"
 #include "utils.h"
 
 #ifdef __MIPSEL__
@@ -87,6 +89,8 @@ static const char *get_action_name(uint32_t action)
 		return "trap";
 	case SCMP_ACT_ERRNO(0):
 		return "errno";
+	case SCMP_ACT_USER_NOTIF:
+		return "notify";
 	}
 
 	return "invalid action";
@@ -116,6 +120,8 @@ static uint32_t get_v2_default_action(char *line)
 		ret_action = SCMP_ACT_ALLOW;
 	} else if (strncmp(line, "trap", 4) == 0) {
 		ret_action = SCMP_ACT_TRAP;
+	} else if (strncmp(line, "notify", 6) == 0) {
+		ret_action = SCMP_ACT_USER_NOTIF;
 	} else if (line[0]) {
 		ERROR("Unrecognized seccomp action \"%s\"", line);
 		return -2;
@@ -928,6 +934,17 @@ static int parse_config_v2(FILE *f, char *line, size_t *line_bufsz, struct lxc_c
 			goto bad_rule;
 		}
 
+		if ((rule.action == SCMP_ACT_USER_NOTIF) &&
+		    !conf->has_seccomp_notify) {
+			ret = seccomp_attr_set(conf->seccomp_ctx,
+					       SCMP_FLTATR_NEW_LISTENER, 1);
+			if (ret)
+				goto bad_rule;
+
+			conf->has_seccomp_notify = true;
+			TRACE("Set SCMP_FLTATR_NEW_LISTENER attribute");
+		}
+
 		if (!do_resolve_add_rule(SCMP_ARCH_NATIVE, line,
 					 conf->seccomp_ctx, &rule))
 			goto bad_rule;
@@ -1228,6 +1245,17 @@ int lxc_seccomp_load(struct lxc_conf *conf)
 			SYSWARN("Failed to export seccomp filter to log file");
 		}
 	}
+
+	if (conf->has_seccomp_notify) {
+		ret = seccomp_notif_get_fd(conf->seccomp_ctx);
+		if (ret < 0) {
+			errno = -ret;
+			return -1;
+		}
+
+		conf->seccomp_notify_fd = ret;
+		TRACE("Retrieved new seccomp listener fd %d", ret);
+	}
 #endif
 
 	return 0;
@@ -1243,5 +1271,60 @@ void lxc_seccomp_free(struct lxc_conf *conf)
 		seccomp_release(conf->seccomp_ctx);
 		conf->seccomp_ctx = NULL;
 	}
+	close_prot_errno_disarm(conf->seccomp_notify_fd);
+	close_prot_errno_disarm(conf->seccomp_notify_proxy_fd);
+	seccomp_notif_free(conf->seccomp_notify_req, conf->seccomp_notify_resp);
+	conf->seccomp_notify_req = NULL;
+	conf->seccomp_notify_resp = NULL;
+#endif
+}
+
+int seccomp_notify_handler(int fd, uint32_t events, void *data,
+			   struct lxc_epoll_descr *descr)
+{
+
+#if HAVE_SCMP_FILTER_CTX
+	int ret;
+	struct lxc_handler *hdlr = data;
+	struct lxc_conf *conf = hdlr->conf;
+	struct seccomp_notif *req = conf->seccomp_notify_req;
+	struct seccomp_notif_resp *resp = conf->seccomp_notify_resp;
+	int listener_proxy_fd = conf->seccomp_notify_proxy_fd;
+	struct seccomp_notify_proxy_msg msg;
+
+	ret = seccomp_notif_receive(fd, req);
+	if (ret)
+		return minus_one_set_errno(-ret);
+
+	if (req->data.nr != SCMP_SYS(mknod))
+		return minus_one_set_errno(EINVAL);
+
+	if (listener_proxy_fd >= 0) {
+		memcpy(&msg.req, req, sizeof(msg.req));
+		msg.monitor_pid = hdlr->monitor_pid;
+		msg.init_pid = hdlr->pid;
+
+		ret = lxc_send_nointr(listener_proxy_fd, &msg, sizeof(msg),
+				      MSG_NOSIGNAL);
+		if (ret < 0 || ret != (ssize_t)sizeof(msg))
+			return -1;
+
+		ret = lxc_recv_nointr(listener_proxy_fd, &msg, sizeof(msg), 0);
+		if (ret != (ssize_t)sizeof(msg))
+			return -1;
+	} else {
+		resp->id = req->id;
+		resp->val = EPERM;
+		resp->error = EPERM;
+		memcpy(&msg.resp, &resp, sizeof(msg.resp));
+	}
+
+	ret = seccomp_notif_send_resp(fd, &msg.resp);
+	if (ret)
+		return minus_one_set_errno(-ret);
+
+	return 0;
+#else
+	return -ENOSYS;
 #endif
 }
diff --git a/src/lxc/start.c b/src/lxc/start.c
index 46a92d3d46..01af9e546d 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -591,6 +591,19 @@ int lxc_poll(const char *name, struct lxc_handler *handler)
 		goto out_mainloop_console;
 	}
 
+#if HAVE_SCMP_FILTER_CTX
+	if (handler->conf->has_seccomp_notify) {
+		ret = lxc_mainloop_add_handler(&descr,
+					       handler->conf->seccomp_notify_fd,
+					       seccomp_notify_handler, handler);
+		if (ret < 0) {
+			ERROR("Failed to add seccomp notify handler for %d to mainloop",
+			      handler->conf->seccomp_notify_fd);
+			goto out_mainloop_console;
+		}
+	}
+#endif
+
 	if (has_console) {
 		struct lxc_terminal *console = &handler->conf->console;
 
@@ -1278,8 +1291,6 @@ static int do_start(void *data)
 
 	/* Setup the container, ip, names, utsname, ... */
 	ret = lxc_setup(handler);
-	close(handler->data_sock[1]);
-	close(handler->data_sock[0]);
 	if (ret < 0) {
 		ERROR("Failed to setup container \"%s\"", handler->name);
 		goto out_warn_father;
@@ -1330,6 +1341,20 @@ static int do_start(void *data)
 	if (ret < 0)
 		goto out_warn_father;
 
+	if (handler->conf->has_seccomp_notify) {
+		ret = lxc_abstract_unix_send_fds(handler->data_sock[0],
+						 &handler->conf->seccomp_notify_fd,
+						 1, NULL, 0);
+		if (ret < 0) {
+			SYSERROR("Failed to send seccomp notify fd to parent");
+			goto out_warn_father;
+		}
+		close(handler->conf->seccomp_notify_fd);
+		handler->conf->seccomp_notify_fd = -EBADF;
+	}
+	close(handler->data_sock[1]);
+	close(handler->data_sock[0]);
+
 	ret = run_lxc_hooks(handler->name, "start", handler->conf, NULL);
 	if (ret < 0) {
 		ERROR("Failed to run lxc.hook.start for container \"%s\"",
@@ -1888,6 +1913,24 @@ static int lxc_spawn(struct lxc_handler *handler)
 		goto out_delete_net;
 	}
 
+	if (handler->conf->has_seccomp_notify) {
+		ret = lxc_abstract_unix_recv_fds(handler->data_sock[1],
+						 &handler->conf->seccomp_notify_fd,
+						 1, NULL, 0);
+		if (ret < 0) {
+			SYSERROR("Failed to receive seccomp notify fd from child");
+			goto out_delete_net;
+		}
+
+		ret = seccomp_notif_alloc(&handler->conf->seccomp_notify_req,
+					  &handler->conf->seccomp_notify_resp);
+		if (ret) {
+			errno = ret;
+			ret = -1;
+			goto out_delete_net;
+		}
+	}
+
 	ret = handler->ops->post_start(handler, handler->data);
 	if (ret < 0)
 		goto out_abort;


More information about the lxc-devel mailing list