[lxc-devel] [PATCH 1/2] cgmanager: support lxc.mount.auto = cgroup

Stéphane Graber stgraber at ubuntu.com
Fri Jan 31 09:49:58 UTC 2014


On Thu, Jan 30, 2014 at 02:18:30PM +0000, Serge Hallyn wrote:
> If it (or any variation thereof) is in the container configuration,
> then mount /sys/fs/cgroup/cgmanager.lower (if it exists) or
> /sys/fs/cgroup/cgmanager into the container so it can run a
> cgproxy.
> 
> Also make sure to clear our groups when we start or attach to a
> container.  Else with unprivileged containers we end up with
> lots of nogroups listed in /proc/1/status.
> 
> Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>

Acked-by: Stéphane Graber <stgraber at ubuntu.com>

As discussed in person, the permissions of /sys/fs/cgroup will be
incorrect with this patch as tmpfs defaults to sticky 777 but Serge will
send a follow-up patch fixing any occurence of wrong tmpfs permissions.

> ---
>  src/lxc/attach.c       | 11 ++++++----
>  src/lxc/bdev.c         |  4 ++++
>  src/lxc/cgmanager.c    | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  src/lxc/cgroup.c       | 25 ++++++++++++----------
>  src/lxc/cgroup.h       | 30 ++++++++++++++------------
>  src/lxc/conf.c         |  4 ++--
>  src/lxc/lxccontainer.c |  3 +++
>  src/lxc/start.c        |  5 +++++
>  src/lxc/utils.c        | 12 +++++++++++
>  src/lxc/utils.h        |  2 ++
>  10 files changed, 123 insertions(+), 31 deletions(-)
> 
> diff --git a/src/lxc/attach.c b/src/lxc/attach.c
> index 8b9bc8d..5c4adcd 100644
> --- a/src/lxc/attach.c
> +++ b/src/lxc/attach.c
> @@ -28,6 +28,7 @@
>  #include <stdlib.h>
>  #include <errno.h>
>  #include <fcntl.h>
> +#include <grp.h>
>  #include <sys/param.h>
>  #include <sys/prctl.h>
>  #include <sys/mount.h>
> @@ -943,10 +944,12 @@ static int attach_child_main(void* data)
>  		new_gid = options->gid;
>  
>  	/* try to set the uid/gid combination */
> -	if ((new_gid != 0 || options->namespaces & CLONE_NEWUSER) && setgid(new_gid)) {
> -		SYSERROR("switching to container gid");
> -		shutdown(ipc_socket, SHUT_RDWR);
> -		rexit(-1);
> +	if ((new_gid != 0 || options->namespaces & CLONE_NEWUSER)) {
> +		if (setgid(new_gid) || setgroups(0, NULL)) {
> +			SYSERROR("switching to container gid");
> +			shutdown(ipc_socket, SHUT_RDWR);
> +			rexit(-1);
> +		}
>  	}
>  	if ((new_uid != 0 || options->namespaces & CLONE_NEWUSER) && setuid(new_uid)) {
>  		SYSERROR("switching to container uid");
> diff --git a/src/lxc/bdev.c b/src/lxc/bdev.c
> index 09b7ca7..072b61c 100644
> --- a/src/lxc/bdev.c
> +++ b/src/lxc/bdev.c
> @@ -31,6 +31,8 @@
>  #include <stdio.h>
>  #include <stdint.h>
>  #include <inttypes.h>
> +#include <sys/types.h>
> +#include <grp.h>
>  #include <unistd.h>
>  #include <errno.h>
>  #include <sched.h>
> @@ -2031,6 +2033,8 @@ static int rsync_rootfs(struct rsync_data *data)
>  		ERROR("Failed to setgid to 0");
>  		return -1;
>  	}
> +	if (setgroups(0, NULL) < 0)
> +		WARN("Failed to clear groups");
>  	if (setuid(0) < 0) {
>  		ERROR("Failed to setuid to 0");
>  		return -1;
> diff --git a/src/lxc/cgmanager.c b/src/lxc/cgmanager.c
> index 2097a25..ca6b317 100644
> --- a/src/lxc/cgmanager.c
> +++ b/src/lxc/cgmanager.c
> @@ -30,6 +30,7 @@
>  #include <dirent.h>
>  #include <fcntl.h>
>  #include <ctype.h>
> +#include <grp.h>
>  #include <sys/types.h>
>  #include <sys/stat.h>
>  #include <sys/param.h>
> @@ -56,6 +57,7 @@ lxc_log_define(lxc_cgmanager, lxc);
>  #include <cgmanager-client/cgmanager-client.h>
>  #include <nih/alloc.h>
>  #include <nih/error.h>
> +#include <nih/string.h>
>  NihDBusProxy *cgroup_manager = NULL;
>  
>  extern struct cgroup_ops *active_cg_ops;
> @@ -257,6 +259,8 @@ static int chown_cgroup_wrapper(void *data)
>  		SYSERROR("Failed to setgid to 0");
>  	if (setresuid(0,0,0) < 0)
>  		SYSERROR("Failed to setuid to 0");
> +	if (setgroups(0, NULL) < 0)
> +		SYSERROR("Failed to clear groups");
>  	return do_chown_cgroup(arg->controller, arg->cgroup_path, arg->origuid);
>  }
>  
> @@ -679,6 +683,59 @@ out:
>  	return pass;
>  }
>  
> +static bool cgm_bind_dir(const char *root, const char *dirname)
> +{
> +	nih_local char *cgpath = NULL;
> +
> +	/* /sys should have been mounted by now */
> +	cgpath = NIH_MUST( nih_strdup(NULL, root) );
> +	NIH_MUST( nih_strcat(&cgpath, NULL, "/sys/fs/cgroup") );
> +
> +	if (!dir_exists(cgpath)) {
> +		ERROR("%s does not exist", cgpath);
> +		return false;
> +	}
> +
> +	/* mount a tmpfs there so we can create subdirs */
> +	if (mount("cgroup", cgpath, "tmpfs", 0, "size=10000")) {
> +		SYSERROR("Failed to mount tmpfs at %s", cgpath);
> +		return false;
> +	}
> +	NIH_MUST( nih_strcat(&cgpath, NULL, "/cgmanager") );
> +
> +	if (mkdir(cgpath, 0755) < 0) {
> +		SYSERROR("Failed to create %s", cgpath);
> +		return false;
> +	}
> +
> +	if (mount(dirname, cgpath, "none", MS_BIND, 0)) {
> +		SYSERROR("Failed to bind mount %s to %s", dirname, cgpath);
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
> +/*
> + * cgm_mount_cgroup:
> + * If /sys/fs/cgroup/cgmanager.lower/ exists, bind mount that to
> + * /sys/fs/cgroup/cgmanager/ in the container.
> + * Otherwise, if /sys/fs/cgroup/cgmanager exists, bind mount that.
> + * Else do nothing
> + */
> +#define CGMANAGER_LOWER_SOCK "/sys/fs/cgroup/cgmanager.lower"
> +#define CGMANAGER_UPPER_SOCK "/sys/fs/cgroup/cgmanager"
> +static bool cgm_mount_cgroup(const char *root,
> +		struct lxc_cgroup_info *cgroup_info, int type)
> +{
> +	if (dir_exists(CGMANAGER_LOWER_SOCK))
> +		return cgm_bind_dir(root, CGMANAGER_LOWER_SOCK);
> +	if (dir_exists(CGMANAGER_UPPER_SOCK))
> +		return cgm_bind_dir(root, CGMANAGER_UPPER_SOCK);
> +	// Host doesn't have cgmanager running?  Then how did we get here?
> +	return false;
> +}
> +
>  static struct cgroup_ops cgmanager_ops = {
>  	.destroy = cgm_destroy,
>  	.init = cgm_init,
> @@ -693,5 +750,6 @@ static struct cgroup_ops cgmanager_ops = {
>  	.name = "cgmanager",
>  	.chown = cgm_chown,
>  	.attach = cgm_attach,
> +	.mount_cgroup = cgm_mount_cgroup,
>  };
>  #endif
> diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c
> index 7456413..288fe20 100644
> --- a/src/lxc/cgroup.c
> +++ b/src/lxc/cgroup.c
> @@ -1326,7 +1326,8 @@ char *lxc_cgroup_path_get(const char *filename, const char *name,
>  	return path;
>  }
>  
> -int lxc_setup_mount_cgroup(const char *root, struct lxc_cgroup_info *cgroup_info, int type)
> +static bool cgroupfs_mount_cgroup(const char *root,
> +		struct lxc_cgroup_info *cgroup_info, int type)
>  {
>  	size_t bufsz = strlen(root) + sizeof("/sys/fs/cgroup");
>  	char *path = NULL;
> @@ -1340,28 +1341,23 @@ int lxc_setup_mount_cgroup(const char *root, struct lxc_cgroup_info *cgroup_info
>  
>  	init_cg_ops();
>  
> -	if (strcmp(active_cg_ops->name, "cgmanager") == 0) {
> -		// todo - offer to bind-mount /sys/fs/cgroup/cgmanager/
> -		return 0;
> -	}
> -
>  	cgfs_d = cgroup_info->data;
>  	base_info = cgfs_d->info;
>  
>  	if (type < LXC_AUTO_CGROUP_RO || type > LXC_AUTO_CGROUP_FULL_MIXED) {
>  		ERROR("could not mount cgroups into container: invalid type specified internally");
>  		errno = EINVAL;
> -		return -1;
> +		return false;
>  	}
>  
>  	path = calloc(1, bufsz);
>  	if (!path)
> -		return -1;
> +		return false;
>  	snprintf(path, bufsz, "%s/sys/fs/cgroup", root);
>  	r = mount("cgroup_root", path, "tmpfs", MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME, "size=10240k,mode=755");
>  	if (r < 0) {
>  		SYSERROR("could not mount tmpfs to /sys/fs/cgroup in the container");
> -		return -1;
> +		return false;
>  	}
>  
>  	/* now mount all the hierarchies we care about */
> @@ -1501,7 +1497,7 @@ int lxc_setup_mount_cgroup(const char *root, struct lxc_cgroup_info *cgroup_info
>  
>  	free(path);
>  
> -	return 0;
> +	return true;
>  
>  out_error:
>  	saved_errno = errno;
> @@ -1511,7 +1507,7 @@ out_error:
>  	free(abs_path);
>  	free(abs_path2);
>  	errno = saved_errno;
> -	return -1;
> +	return false;
>  }
>  
>  int lxc_cgroup_nrtasks_handler(struct lxc_handler *handler)
> @@ -2345,6 +2341,7 @@ static struct cgroup_ops cgfs_ops = {
>  	.name = "cgroupfs",
>  	.attach = lxc_cgroupfs_attach,
>  	.chown = NULL,
> +	.mount_cgroup = cgroupfs_mount_cgroup,
>  };
>  static void init_cg_ops(void)
>  {
> @@ -2456,3 +2453,9 @@ bool lxc_cgroup_attach(const char *name, const char *lxcpath, pid_t pid)
>  	init_cg_ops();
>  	return active_cg_ops->attach(name, lxcpath, pid);
>  }
> +
> +bool lxc_setup_mount_cgroup(const char *root,
> +		struct lxc_cgroup_info *cgroup_info, int type)
> +{
> +	return active_cg_ops->mount_cgroup(root, cgroup_info, type);
> +}
> diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h
> index a172ffb..45b58d0 100644
> --- a/src/lxc/cgroup.h
> +++ b/src/lxc/cgroup.h
> @@ -169,6 +169,19 @@ extern int do_unfreeze(int freeze, const char *name, const char *lxcpath);
>  extern int freeze_unfreeze(const char *name, int freeze, const char *lxcpath);
>  extern const char *lxc_state2str(lxc_state_t state);
>  extern lxc_state_t freezer_state(const char *name, const char *lxcpath);
> +
> +/*
> + * cgroup-related data for backend use in start/stop of a
> + * container.  This is tacked to the lxc_handler.
> + */
> +struct lxc_cgroup_info {
> +	/* handlers to actually do the cgroup stuff */
> +	struct cgroup_ops *ops;
> +	/* extra data for the cgroup_ops, i.e. mountpoints for fs backend */
> +	void *data;
> +	const char *cgroup_pattern;
> +};
> +
>  /* per-backend cgroup hooks */
>  struct cgroup_ops {
>  	void (*destroy)(struct lxc_handler *handler);
> @@ -183,23 +196,11 @@ struct cgroup_ops {
>  	bool (*setup_limits)(struct lxc_handler *handler, bool with_devices);
>  	bool (*chown)(struct lxc_handler *handler);
>  	bool (*attach)(const char *name, const char *lxcpath, pid_t pid);
> +	bool (*mount_cgroup)(const char *root, struct lxc_cgroup_info *info,
> +			int type);
>  	const char *name;
>  };
>  
> -/*
> - * cgroup-related data for backend use in start/stop of a
> - * container.  This is tacked to the lxc_handler.
> - */
> -struct lxc_cgroup_info {
> -	/* handlers to actually do the cgroup stuff */
> -	struct cgroup_ops *ops;
> -	/* extra data for the cgroup_ops, i.e. mountpoints for fs backend */
> -	void *data;
> -	const char *cgroup_pattern;
> -};
> -
> -extern int lxc_setup_mount_cgroup(const char *root, struct lxc_cgroup_info *base_info, int type);
> -
>  struct cgfs_data {
>  	struct cgroup_meta_data *meta;
>  	struct cgroup_process_info *info;
> @@ -218,6 +219,7 @@ extern void cgroup_cleanup(struct lxc_handler *handler);
>  extern bool cgroup_create_legacy(struct lxc_handler *handler);
>  extern char *cgroup_get_cgroup(struct lxc_handler *handler, const char *subsystem);
>  extern bool lxc_cgroup_attach(const char *name, const char *lxcpath, pid_t pid);
> +extern bool lxc_setup_mount_cgroup(const char *root, struct lxc_cgroup_info *cgroup_info, int type);
>  extern int lxc_cgroup_set(const char *filename, const char *value, const char *name, const char *lxcpath);
>  extern int lxc_cgroup_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath);
>  extern int lxc_unfreeze_fromhandler(struct lxc_handler *handler);
> diff --git a/src/lxc/conf.c b/src/lxc/conf.c
> index 06307b7..180c51e 100644
> --- a/src/lxc/conf.c
> +++ b/src/lxc/conf.c
> @@ -719,8 +719,8 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_cg
>  	}
>  
>  	if (flags & LXC_AUTO_CGROUP_MASK) {
> -		r = lxc_setup_mount_cgroup(conf->rootfs.mount, cgroup_info, flags & LXC_AUTO_CGROUP_MASK);
> -		if (r < 0) {
> +		if (!lxc_setup_mount_cgroup(conf->rootfs.mount, cgroup_info,
> +					flags & LXC_AUTO_CGROUP_MASK)) {
>  			SYSERROR("error mounting /sys/fs/cgroup");
>  			return -1;
>  		}
> diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
> index 3862890..854be07 100644
> --- a/src/lxc/lxccontainer.c
> +++ b/src/lxc/lxccontainer.c
> @@ -34,6 +34,7 @@
>  #include <arpa/inet.h>
>  #include <libgen.h>
>  #include <stdint.h>
> +#include <grp.h>
>  
>  #include <lxc/lxccontainer.h>
>  #include <lxc/version.h>
> @@ -2447,6 +2448,8 @@ static int clone_update_rootfs(struct clone_update_data *data)
>  		ERROR("Failed to setuid to 0");
>  		return -1;
>  	}
> +	if (setgroups(0, NULL) < 0)
> +		WARN("Failed to clear groups");
>  
>  	if (unshare(CLONE_NEWNS) < 0)
>  		return -1;
> diff --git a/src/lxc/start.c b/src/lxc/start.c
> index 423bdd5..85d27ea 100644
> --- a/src/lxc/start.c
> +++ b/src/lxc/start.c
> @@ -32,6 +32,7 @@
>  #include <signal.h>
>  #include <fcntl.h>
>  #include <termios.h>
> +#include <grp.h>
>  #include <poll.h>
>  #include <sys/param.h>
>  #include <sys/file.h>
> @@ -585,6 +586,10 @@ static int do_start(void *data)
>  			SYSERROR("setuid");
>  			goto out_warn_father;
>  		}
> +		if (setgroups(0, NULL)) {
> +			SYSERROR("setgroups");
> +			goto out_warn_father;
> +		}
>  	}
>  
>  	#if HAVE_SYS_CAPABILITY_H
> diff --git a/src/lxc/utils.c b/src/lxc/utils.c
> index 5ab4c77..64b7b82 100644
> --- a/src/lxc/utils.c
> +++ b/src/lxc/utils.c
> @@ -1136,3 +1136,15 @@ found:
>  	free(line);
>  	return nsid;
>  }
> +
> +bool dir_exists(const char *path)
> +{
> +	struct stat sb;
> +	int ret;
> +
> +	ret = stat(path, &sb);
> +	if (ret < 0)
> +		// could be something other than eexist, just say no
> +		return false;
> +	return S_ISDIR(sb.st_mode);
> +}
> diff --git a/src/lxc/utils.h b/src/lxc/utils.h
> index f9037b7..dec6526 100644
> --- a/src/lxc/utils.h
> +++ b/src/lxc/utils.h
> @@ -270,4 +270,6 @@ inline static bool am_unpriv(void) {
>   * parse /proc/self/uid_map to find what @orig maps to
>   */
>  extern uid_t get_ns_uid(uid_t orig);
> +
> +extern bool dir_exists(const char *path);
>  #endif
> -- 
> 1.8.5.3
> 
> _______________________________________________
> lxc-devel mailing list
> lxc-devel at lists.linuxcontainers.org
> http://lists.linuxcontainers.org/listinfo/lxc-devel

-- 
Stéphane Graber
Ubuntu developer
http://www.ubuntu.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: Digital signature
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20140131/e8dc67d1/attachment.pgp>


More information about the lxc-devel mailing list