[lxc-devel] [lxc/master] 2017 08 05/oci fix share
hallyn on Github
lxc-bot at linuxcontainers.org
Mon Aug 7 02:00:30 UTC 2017
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 400 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20170807/198f970f/attachment.bin>
-------------- next part --------------
From 82428d1e74d9c0ce1df022d232af9d19c66767b9 Mon Sep 17 00:00:00 2001
From: Serge Hallyn <serge at hallyn.com>
Date: Sat, 5 Aug 2017 11:24:25 -0500
Subject: [PATCH 1/2] [WIP] Add OCi template
Signed-off-by: Serge Hallyn <serge at hallyn.com>
---
templates/lxc-oci.in | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 159 insertions(+)
create mode 100755 templates/lxc-oci.in
diff --git a/templates/lxc-oci.in b/templates/lxc-oci.in
new file mode 100755
index 000000000..598ed403c
--- /dev/null
+++ b/templates/lxc-oci.in
@@ -0,0 +1,159 @@
+#!/bin/sh
+
+# Create application containers from OCI images
+
+# Copyright © 2014 Stéphane Graber <stgraber at ubuntu.com>
+# Copyright © 2017 Serge Hallyn <serge at hallyn.com>
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
+# USA
+
+set -eu
+
+# Make sure the usual locations are in PATH
+export PATH=$PATH:/usr/sbin:/usr/bin:/sbin:/bin
+
+# Check for required binaries
+for bin in skopeo oci-image-tool; do
+ if ! type $bin >/dev/null 2>&1; then
+ echo "ERROR: Missing required tool: $bin" 1>&2
+ exit 1
+ fi
+done
+
+# Some useful functions
+cleanup() {
+ if [ -d "$DOWNLOAD_TEMP" ]; then
+ rm -Rf $DOWNLOAD_TEMP
+ fi
+}
+
+in_userns() {
+ [ -e /proc/self/uid_map ] || { echo no; return; }
+ while read line; do
+ fields=$(echo $line | awk '{ print $1 " " $2 " " $3 }')
+ [ "$fields" = "0 0 4294967295" ] && { echo no; return; } || true
+ echo $fields | grep -q " 0 1$" && { echo userns-root; return; } || true
+ done < /proc/self/uid_map
+
+ [ "$(cat /proc/self/uid_map)" = "$(cat /proc/1/uid_map)" ] && \
+ { echo userns-root; return; }
+ echo yes
+}
+
+usage() {
+ cat <<EOF
+LXC container template for OCI images
+
+Special arguments:
+[ -h | --help ]: Print this help message and exit.
+
+Required arguments:
+[ -u | --url <url> ]: The OCi image URL
+
+LXC internal arguments (do not pass manually!):
+[ --name <name> ]: The container name
+[ --path <path> ]: The path to the container
+[ --rootfs <rootfs> ]: The path to the container's rootfs
+[ --mapped-uid <map> ]: A uid map (user namespaces)
+[ --mapped-gid <map> ]: A gid map (user namespaces)
+
+EOF
+ return 0
+}
+
+options=$(getopt -o u:h -l help,name:,path:,\
+rootfs:,mapped-uid:,mapped-gid: -- "$@")
+
+if [ $? -ne 0 ]; then
+ usage
+ exit 1
+fi
+eval set -- "$options"
+
+OCI_URL=""
+
+while :; do
+ case "$1" in
+ -h|--help) usage && exit 1;;
+ -u|--url) OCI_URL=$2; shift 2;;
+ --name) LXC_NAME=$2; shift 2;;
+ --path) LXC_PATH=$2; shift 2;;
+ --rootfs) LXC_ROOTFS=$2; shift 2;;
+ --mapped-uid) LXC_MAPPED_UID=$2; shift 2;;
+ --mapped-gid) LXC_MAPPED_GID=$2; shift 2;;
+ *) break;;
+ esac
+done
+
+# Check that we have all variables we need
+if [ -z "$LXC_NAME" ] || [ -z "$LXC_PATH" ] || [ -z "$LXC_ROOTFS" ]; then
+ echo "ERROR: Not running through LXC." 1>&2
+ exit 1
+fi
+
+if [ -z "$OCI_URL" ]; then
+ echo "ERROR: no OCI URL given"
+ exit 1
+fi
+
+USERNS=$(in_userns)
+
+if [ "$USERNS" != "no" ]; then
+ if [ "$USERNS" = "yes" ]; then
+ if [ -z "$LXC_MAPPED_UID" ] || [ "$LXC_MAPPED_UID" = "-1" ]; then
+ echo "ERROR: In a user namespace without a map." 1>&2
+ exit 1
+ fi
+ DOWNLOAD_MODE="user"
+ DOWNLOAD_TARGET="user"
+ else
+ DOWNLOAD_MODE="user"
+ DOWNLOAD_TARGET="system"
+ fi
+fi
+
+# Trap all exit signals
+trap cleanup EXIT HUP INT TERM
+
+if ! type mktemp >/dev/null 2>&1; then
+ DOWNLOAD_TEMP=/tmp/lxc-oci.$$
+ mkdir -p $DOWNLOAD_TEMP
+else
+ DOWNLOAD_TEMP=$(mktemp -d)
+fi
+
+# Download the image - TODO - cache
+skopeo copy "${OCI_URL}" "oci:${DOWNLOAD_TEMP}:latest"
+
+# Unpack the rootfs
+echo "Unpacking the rootfs"
+
+oci-image-tool unpack --ref latest ${DOWNLOAD_TEMP} ${LXC_ROOTFS}
+
+LXC_CONF_FILE="${LXC_PATH}/config"
+echo "lxc.init_cmd = /bin/sh" >> "${LXC_CONF_FILE}"
+echo "lxc.mount.auto = proc:mixed sys:mixed cgroup:mixed" >> "${LXC_CONF_FILE}"
+
+echo "lxc.utsname = ${LXC_NAME}" >> ${LXC_PATH}/config
+
+if [ -n "$LXC_MAPPED_UID" ] && [ "$LXC_MAPPED_UID" != "-1" ]; then
+ chown $LXC_MAPPED_UID $LXC_PATH/config $LXC_PATH/fstab >/dev/null 2>&1 || true
+fi
+if [ -n "$LXC_MAPPED_GID" ] && [ "$LXC_MAPPED_GID" != "-1" ]; then
+ chgrp $LXC_MAPPED_GID $LXC_PATH/config $LXC_PATH/fstab >/dev/null 2>&1 || true
+fi
+
+exit 0
From 6b7d5edcf677ec41406837525c14a17140013a7d Mon Sep 17 00:00:00 2001
From: Serge Hallyn <serge at hallyn.com>
Date: Sat, 5 Aug 2017 14:47:44 -0500
Subject: [PATCH 2/2] attach-ns: work unprivileged.
Also, close the fds after successful attach to inherted
fds.
This requires quite a few workarounds due to limitations in what
we can do with unprivileged namespaces.
To join another task's network ns, we must join their user_ns (or
a parent thereof, to which we have privilege - we don't currently
do anything to guarantee that exists). Then, as we are not the
parent of the user_ns, we cannot move the new task into the right
cgroups without first entering the same namespace.
This can all certainly be cleaned up and better organized. This is
a first attempt which just makes it work. So now
lxc-start --attach-net someotherpid -n x1
can work unprivileged.
Signed-off-by: Serge Hallyn <serge at hallyn.com>
---
src/lxc/criu.c | 2 +-
src/lxc/namespace.c | 78 +++++++++++++++++++++++++++++++-
src/lxc/namespace.h | 1 +
src/lxc/start.c | 110 +++++++++++++++++++++++++++++++++++++---------
src/lxc/start.h | 2 +-
src/lxc/tools/lxc_start.c | 28 ++++++++++++
6 files changed, 197 insertions(+), 24 deletions(-)
diff --git a/src/lxc/criu.c b/src/lxc/criu.c
index c9384bdd2..47341d822 100644
--- a/src/lxc/criu.c
+++ b/src/lxc/criu.c
@@ -823,7 +823,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_
goto out_fini_handler;
}
- resolve_clone_flags(handler);
+ resolve_clone_flags(handler, false); // TODO - fix second argument
if (pipe(pipes) < 0) {
SYSERROR("pipe() failed");
diff --git a/src/lxc/namespace.c b/src/lxc/namespace.c
index 3a5b3bef6..500269561 100644
--- a/src/lxc/namespace.c
+++ b/src/lxc/namespace.c
@@ -24,15 +24,20 @@
#include <unistd.h>
#include <alloca.h>
#include <errno.h>
+#include <sched.h>
#include <signal.h>
+#include <stdlib.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
-#include <fcntl.h>
#include "namespace.h"
#include "log.h"
+int setresuid(uid_t ruid, uid_t euid, uid_t suid);
+int setresgid(gid_t rgid, gid_t egid, gid_t sgid);
+int setns(int fd, int nstype);
+
lxc_log_define(lxc_namespace, lxc);
struct clone_arg {
@@ -69,6 +74,77 @@ pid_t lxc_clone(int (*fn)(void *), void *arg, int flags)
return ret;
}
+/*
+ * like lxc_clone, but first attach to an existing user_ns
+ */
+pid_t lxc_clone_special_userns(int (*fn)(void *), void *arg, int flags)
+{
+ struct lxc_handler *handler = arg;
+ struct clone_arg clone_arg = {
+ .fn = fn,
+ .arg = arg,
+ };
+ size_t stack_size = sysconf(_SC_PAGESIZE);
+ void *stack = alloca(stack_size);
+ pid_t ret, pid;
+ int p[2];
+
+ if (handler->conf->inherit_ns_fd[LXC_NS_USER] == -1) {
+ ERROR("lxc_clone_special_userns: i shouldn't have been called");
+ return -1;
+ }
+ if (pipe(p) < 0)
+ return -1;
+
+ pid = fork();
+ if (pid < 0)
+ return pid;
+ if (pid > 0) {
+ close(p[1]);
+ ret = -1;
+ ret = read(p[0], &pid, sizeof(pid_t));
+ close(p[0]);
+ if (ret != sizeof(pid_t))
+ return -1;
+ return pid;
+ }
+ close(p[0]);
+
+ ret = setns(handler->conf->inherit_ns_fd[LXC_NS_USER], 0);
+ if (ret < 0) {
+ ERROR("Failed setting requested existing userns");
+ exit(1);
+ }
+ ret = setresgid(0, 0, 0);
+ if (ret < 0) {
+ ERROR("Failed setting gid to container 0");
+ exit(1);
+ }
+ ret = setresuid(0, 0, 0);
+ if (ret < 0) {
+ ERROR("Failed setting uid to container 0");
+ exit(1);
+ }
+ stack_size = sysconf(_SC_PAGESIZE);
+ stack = alloca(stack_size);
+ flags &= ~CLONE_NEWUSER;
+
+ close(handler->conf->inherit_ns_fd[LXC_NS_USER]);
+ handler->conf->inherit_ns_fd[LXC_NS_USER] = -1;
+#ifdef __ia64__
+ ret = __clone2(do_clone, stack,
+ stack_size, flags | SIGCHLD, &clone_arg);
+#else
+ ret = clone(do_clone, stack + stack_size, flags | SIGCHLD, &clone_arg);
+#endif
+ if (ret < 0)
+ ERROR("Failed to clone (%#x): %s.", flags, strerror(errno));
+
+ if (write(p[1], &ret, sizeof(pid_t)) != sizeof(pid_t))
+ exit(1);
+ exit(0);
+}
+
/* Leave the user namespace at the first position in the array of structs so
* that we always attach to it first when iterating over the struct and using
* setns() to switch namespaces. This especially affects lxc_attach(): Suppose
diff --git a/src/lxc/namespace.h b/src/lxc/namespace.h
index 4916950c1..e58877448 100644
--- a/src/lxc/namespace.h
+++ b/src/lxc/namespace.h
@@ -80,6 +80,7 @@ int clone(int (*fn)(void *), void *child_stack,
#endif
extern pid_t lxc_clone(int (*fn)(void *), void *arg, int flags);
+extern pid_t lxc_clone_special_userns(int (*fn)(void *), void *arg, int flags);
extern int lxc_namespace_2_cloneflag(char *namespace);
extern int lxc_fill_namespace_flags(char *flaglist, int *flags);
diff --git a/src/lxc/start.c b/src/lxc/start.c
index a360f784c..7e7ba52b8 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -168,20 +168,27 @@ static bool preserve_ns(int ns_fd[LXC_NS_MAX], int clone_flags, pid_t pid)
}
static int attach_ns(const int ns_fd[LXC_NS_MAX]) {
- int i;
+ int i, ret = -1;
for (i = 0; i < LXC_NS_MAX; i++) {
if (ns_fd[i] < 0)
continue;
+ INFO("Attaching to %s namespace.", ns_info[i].proc_name);
if (setns(ns_fd[i], 0) != 0)
goto error;
}
- return 0;
+ ret = 0;
error:
- SYSERROR("Failed to attach %s namespace.", ns_info[i].proc_name);
- return -1;
+ if (ret)
+ SYSERROR("Failed to attach %s namespace.", ns_info[i].proc_name);
+
+ for (i = 0; i < LXC_NS_MAX; i++) {
+ if (ns_fd[i] < 0)
+ close(ns_fd[i]);
+ }
+ return ret;
}
static int match_fd(int fd)
@@ -859,10 +866,14 @@ static int do_start(void *data)
if (lxc_sync_wait_parent(handler, LXC_SYNC_STARTUP))
return -1;
+ if (attach_ns(handler->conf->inherit_ns_fd) < 0)
+ return -1;
+
/* Unshare CLONE_NEWNET after CLONE_NEWUSER. See
* https://github.com/lxc/lxd/issues/1978.
*/
- if ((handler->clone_flags & (CLONE_NEWNET | CLONE_NEWUSER)) ==
+ if (handler->conf->inherit_ns_fd[LXC_NS_NET] == -1 &&
+ (handler->clone_flags & (CLONE_NEWNET | CLONE_NEWUSER)) ==
(CLONE_NEWNET | CLONE_NEWUSER)) {
ret = unshare(CLONE_NEWNET);
if (ret < 0) {
@@ -1168,10 +1179,18 @@ static int lxc_recv_ttys_from_child(struct lxc_handler *handler)
return ret;
}
-void resolve_clone_flags(struct lxc_handler *handler)
+void resolve_clone_flags(struct lxc_handler *handler, bool wants_to_map_ids)
{
handler->clone_flags = CLONE_NEWPID | CLONE_NEWNS;
+ if (wants_to_map_ids) {
+ handler->clone_flags |= CLONE_NEWUSER | CLONE_NEWIPC | \
+ CLONE_NEWUTS;
+ if (!lxc_requests_empty_network(handler))
+ handler->clone_flags |= CLONE_NEWNET;
+ return ;
+ }
+
if (!lxc_list_empty(&handler->conf->id_map))
handler->clone_flags |= CLONE_NEWUSER;
@@ -1193,6 +1212,38 @@ void resolve_clone_flags(struct lxc_handler *handler)
INFO("Inheriting a UTS namespace.");
}
+static bool enter_cgroup_in_ns(int ufd, struct lxc_handler *handler)
+{
+ int pid, ret;
+
+ INFO("switching to %d user_ns to set cgroups", handler->pid);
+ pid = fork();
+ if (pid < 0)
+ return -1;
+ if (pid > 0)
+ return wait_for_pid(pid);
+
+ ret = setns(ufd, CLONE_NEWUSER);
+ if (ret) {
+ SYSERROR("Failed to switch to ns to enter cgroup");
+ sleep(120);
+ exit(1);
+ }
+ ret = setresgid(0, 0, 0);
+ if (ret < 0) {
+ ERROR("Failed setting gid to container 0");
+ exit(1);
+ }
+ ret = setresuid(0, 0, 0);
+ if (ret < 0) {
+ ERROR("Failed setting uid to container 0");
+ exit(1);
+ }
+ if (!cgroup_enter(handler))
+ exit(1);
+ exit(0);
+}
+
/* lxc_spawn() performs crucial setup tasks and clone()s the new process which
* exec()s the requested container binary.
* Note that lxc_spawn() runs in the parent namespaces. Any operations performed
@@ -1207,7 +1258,7 @@ static int lxc_spawn(struct lxc_handler *handler)
bool cgroups_connected = false;
int saved_ns_fd[LXC_NS_MAX];
int preserve_mask = 0, i, flags;
- int netpipepair[2], nveths;
+ int netpipepair[2], nveths, joined_unpriv_userns = -1;
bool wants_to_map_ids;
struct lxc_list *id_map;
@@ -1215,9 +1266,11 @@ static int lxc_spawn(struct lxc_handler *handler)
id_map = &handler->conf->id_map;
wants_to_map_ids = !lxc_list_empty(id_map);
- for (i = 0; i < LXC_NS_MAX; i++)
- if (handler->conf->inherit_ns_fd[i] != -1)
- preserve_mask |= ns_info[i].clone_flag;
+ if (!wants_to_map_ids) {
+ for (i = 0; i < LXC_NS_MAX; i++)
+ if (handler->conf->inherit_ns_fd[i] != -1)
+ preserve_mask |= ns_info[i].clone_flag;
+ }
if (lxc_sync_init(handler))
return -1;
@@ -1227,7 +1280,7 @@ static int lxc_spawn(struct lxc_handler *handler)
return -1;
}
- resolve_clone_flags(handler);
+ resolve_clone_flags(handler, wants_to_map_ids);
if (handler->clone_flags & CLONE_NEWNET) {
if (!lxc_list_empty(&handler->conf->network)) {
@@ -1281,10 +1334,10 @@ static int lxc_spawn(struct lxc_handler *handler)
INFO("Failed to pin the rootfs for container \"%s\".", handler->name);
}
- if (!preserve_ns(saved_ns_fd, preserve_mask, getpid()))
+ if (!wants_to_map_ids && !preserve_ns(saved_ns_fd, preserve_mask, getpid()))
goto out_delete_net;
- if (attach_ns(handler->conf->inherit_ns_fd) < 0)
+ if (!wants_to_map_ids && attach_ns(handler->conf->inherit_ns_fd) < 0)
goto out_delete_net;
if (am_unpriv() && (nveths = count_veths(&handler->conf->network))) {
@@ -1306,7 +1359,12 @@ static int lxc_spawn(struct lxc_handler *handler)
*/
flags &= ~CLONE_NEWNET;
}
- handler->pid = lxc_clone(do_start, handler, flags);
+ if (wants_to_map_ids && handler->conf->inherit_ns_fd[LXC_NS_USER] != -1) {
+ handler->pid = lxc_clone_special_userns(do_start, handler, flags);
+ joined_unpriv_userns = handler->conf->inherit_ns_fd[LXC_NS_USER];
+ handler->conf->inherit_ns_fd[LXC_NS_USER] = -1;
+ } else
+ handler->pid = lxc_clone(do_start, handler, flags);
if (handler->pid < 0) {
SYSERROR("Failed to clone a new set of namespaces.");
goto out_delete_net;
@@ -1318,7 +1376,7 @@ static int lxc_spawn(struct lxc_handler *handler)
if (!preserve_ns(handler->nsfd, handler->clone_flags | preserve_mask, handler->pid))
INFO("Failed to preserve namespace for lxc.hook.stop.");
- if (attach_ns(saved_ns_fd))
+ if (!wants_to_map_ids && attach_ns(saved_ns_fd))
WARN("Failed to restore saved namespaces.");
lxc_sync_fini_child(handler);
@@ -1329,7 +1387,7 @@ static int lxc_spawn(struct lxc_handler *handler)
* mapped to something else on the host.) later to become a valid uid
* again.
*/
- if (wants_to_map_ids && lxc_map_ids(id_map, handler->pid)) {
+ if (joined_unpriv_userns == -1 && wants_to_map_ids && lxc_map_ids(id_map, handler->pid)) {
ERROR("Failed to set up id mapping.");
goto out_delete_net;
}
@@ -1353,11 +1411,17 @@ static int lxc_spawn(struct lxc_handler *handler)
goto out_delete_net;
}
- if (!cgroup_enter(handler))
- goto out_delete_net;
-
- if (!cgroup_chown(handler))
- goto out_delete_net;
+ if (joined_unpriv_userns != -1) {
+ if (!cgroup_chown(handler))
+ goto out_delete_net;
+ if (enter_cgroup_in_ns(joined_unpriv_userns, handler))
+ goto out_delete_net;
+ } else {
+ if (!cgroup_enter(handler))
+ goto out_delete_net;
+ if (!cgroup_chown(handler))
+ goto out_delete_net;
+ }
if (failed_before_rename)
goto out_delete_net;
@@ -1437,10 +1501,14 @@ static int lxc_spawn(struct lxc_handler *handler)
lxc_sync_fini(handler);
handler->netnsfd = lxc_preserve_ns(handler->pid, "net");
+ if (joined_unpriv_userns != -1)
+ close(joined_unpriv_userns);
return 0;
out_delete_net:
+ if (joined_unpriv_userns != -1)
+ close(joined_unpriv_userns);
if (cgroups_connected)
cgroup_disconnect();
if (handler->clone_flags & CLONE_NEWNET)
diff --git a/src/lxc/start.h b/src/lxc/start.h
index d8d06cfbf..c40c733d1 100644
--- a/src/lxc/start.h
+++ b/src/lxc/start.h
@@ -92,6 +92,6 @@ extern int lxc_check_inherited(struct lxc_conf *conf, bool closeall,
int __lxc_start(const char *, struct lxc_handler *, struct lxc_operations *,
void *, const char *, bool);
-extern void resolve_clone_flags(struct lxc_handler *handler);
+extern void resolve_clone_flags(struct lxc_handler *handler, bool wants_to_map_ids);
#endif
diff --git a/src/lxc/tools/lxc_start.c b/src/lxc/tools/lxc_start.c
index ef45ffb08..c0f928e45 100644
--- a/src/lxc/tools/lxc_start.c
+++ b/src/lxc/tools/lxc_start.c
@@ -356,6 +356,34 @@ int main(int argc, char *argv[])
goto out;
conf->inherit_ns_fd[i] = fd;
}
+ if (!lxc_list_empty(&conf->id_map) && conf->inherit_ns_fd[LXC_NS_USER] == -1) {
+ /*
+ * If an unpriv user wants to share a netns, he can only do so
+ * if he is privileged toward the userns which owns the netns. So
+ * we have to enter the userns as well, first. Note - this means
+ * that if the user asks for --share-net=X --share-ipc=Y and X and Y
+ * have different owning user namespaces, this will likely fail. We
+ * could make the rare case of shared common ancestor work, but it's
+ * not worth it. After all noone's noticed that this was completely
+ * broken for unpriv users for years.
+ */
+ for (i = 0; i < LXC_NS_MAX; i++) {
+ if (i == LXC_NS_USER)
+ continue;
+ if (conf->inherit_ns_fd[i] == -1)
+ continue;
+ // we need to inherit userns as well
+ int pid = pid_from_lxcname(my_args.share_ns[i], lxcpath);
+ if (pid < 1)
+ goto out;
+ int fd = open_ns(pid, "user");
+ if (fd < 0)
+ goto out;
+ INFO("XXX Setting user ns in inherit_ns_fd");
+ conf->inherit_ns_fd[LXC_NS_USER] = fd;
+ break;
+ }
+ }
if (!my_args.daemonize) {
c->want_daemonize(c, false);
More information about the lxc-devel
mailing list