[lxc-devel] [PATCH 2/2] RFC: cgroup: start introducing support for cgroup manager

Stéphane Graber stgraber at ubuntu.com
Mon Jan 13 21:43:53 UTC 2014


On Mon, Jan 13, 2014 at 03:33:16PM -0600, Serge Hallyn wrote:
> With this patch all regular cgroup functinality still seems to
> work.  The cgmanager support however doesn't even feign
> completeness and is completely untested.  The patch is to show
> the direction I'm going.  You can see how lxc interacts with cgmanager
> at the lxc_cgmanager_enter() and lxc_cgmanager_create() calls.
> 

As we discussed on IRC, I agree that we should be pushing this even if
it's not complete, so long as it doesn't regress the non-cgmanager case.
It'll be easier to iterate that way and avoid possibly tricky merges
(depending on what other changes are coming in).

I also added one comment below (about configure.ac).

> Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
> ---
>  configure.ac        |   9 +++
>  src/lxc/Makefile.am |   9 +++
>  src/lxc/cgroup.c    | 214 ++++++++++++++++++++++++++++++++++++++++++++--------
>  src/lxc/cgroup.h    |   1 +
>  4 files changed, 201 insertions(+), 32 deletions(-)
> 
> diff --git a/configure.ac b/configure.ac
> index e153693..1c74ee3 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -31,6 +31,15 @@ AC_CANONICAL_HOST
>  AM_PROG_CC_C_O
>  AC_GNU_SOURCE
>  
> +PKG_CHECK_MODULES([CGMANAGER], [libcgmanager], [have_cgmanager=yes], [have_cgmanager=no])
> +
> +if test "$have_cgmanager" = yes; then
> +	AC_DEFINE([HAVE_CGMANAGER], [1], [Build with cgmanager support])
> +	PKG_CHECK_MODULES([NIH], [libnih >= 1.0.2])
> +	PKG_CHECK_MODULES([NIH_DBUS], [libnih-dbus >= 1.0.0])
> +	PKG_CHECK_MODULES([DBUS], [dbus-1 >= 1.2.16])
> +fi
> +

This really ought to be a --enable flag (with default=auto, similar to
most other options) and should also be added to the text summary at the
end.

>  # Detect the distribution. This is used for the default configuration and
>  # for some distro-specific build options.
>  AC_MSG_CHECKING([host distribution])
> diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am
> index 1e0232b..01f6026 100644
> --- a/src/lxc/Makefile.am
> +++ b/src/lxc/Makefile.am
> @@ -144,6 +144,11 @@ liblxc_so_LDFLAGS = \
>  
>  liblxc_so_LDADD = $(CAP_LIBS) $(APPARMOR_LIBS) $(SECCOMP_LIBS)
>  
> +#if HAVE_CGMANAGER
> +liblxc_so_LDADD += $(CGMANAGER_LIBS) $(DBUS_LIBS) $(NIH_LIBS) $(NIH_DBUS_LIBS)
> +liblxc_so_CFLAGS += $(CGMANAGER_CFLAGS) $(DBUS_CFLAGS) $(NIH_CFLAGS) $(NIH_DBUS_CFLAGS)
> +#endif
> +
>  bin_SCRIPTS = \
>  	lxc-ps \
>  	lxc-netstat \
> @@ -245,6 +250,10 @@ LDADD=liblxc.so @CAP_LIBS@ @APPARMOR_LIBS@ @SECCOMP_LIBS@
>  lxc_attach_SOURCES = lxc_attach.c
>  lxc_autostart_SOURCES = lxc_autostart.c
>  lxc_cgroup_SOURCES = lxc_cgroup.c
> +#if HAVE_CGMANAGER
> +lxc_cgroup_LDADD = $(CGMANAGER_LIBS) $(DBUS_LIBS) $(NIH_LIBS) $(NIH_DBUS_LIBS) $(LDADD)
> +lxc_cgroup_CFLAGS = $(CGMANAGER_CFLAGS) $(DBUS_CFLAGS) $(NIH_CFLAGS) $(NIH_DBUS_CFLAGS)
> +#endif
>  lxc_checkpoint_SOURCES = lxc_checkpoint.c
>  lxc_config_SOURCES = lxc_config.c
>  lxc_console_SOURCES = lxc_console.c
> diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c
> index ff237fe..8a81a12 100644
> --- a/src/lxc/cgroup.c
> +++ b/src/lxc/cgroup.c
> @@ -82,6 +82,103 @@ static int cgroup_recursive_task_count(const char *cgroup_path);
>  static int count_lines(const char *fn);
>  static int handle_cgroup_settings(struct cgroup_mount_point *mp, char *cgroup_path);
>  
> +#ifdef HAVE_CGMANAGER
> +#include <nih-dbus/dbus_connection.h>
> +#include <cgmanager-client/cgmanager-client.h>
> +/* this needs to be mutexed for api use */
> +static bool cgmanager_initialized = false;
> +static bool use_cgmanager = true;
> +/* move this stuff into cgmanager.c */
> +NihDBusProxy *cgroup_manager = NULL;
> +
> +static bool lxc_init_cgmanager(void);
> +static void cgmanager_disconnected(DBusConnection *connection)
> +{
> +	WARN("Cgroup manager connection was terminated");
> +	cgroup_manager = NULL;
> +	cgmanager_initialized = false;
> +	if (lxc_init_cgmanager()) {
> +		cgmanager_initialized = true;
> +		INFO("New cgroup manager connection was opened");
> +	}
> +}
> +
> +#define CGMANAGER_DBUS_SOCK "unix:path=/sys/fs/cgroup/cgmanager/sock"
> +static bool lxc_init_cgmanager(void)
> +{
> +	DBusError dbus_error;
> +	DBusConnection *connection;
> +	dbus_error_init(&dbus_error);
> +
> +	connection = nih_dbus_connect(CGMANAGER_DBUS_SOCK, cgmanager_disconnected);
> +	if (!connection) {
> +		ERROR("Error opening cgmanager connection at %s", CGMANAGER_DBUS_SOCK);
> +		return false;
> +	}
> +	dbus_connection_set_exit_on_disconnect(connection, FALSE);
> +	dbus_error_free(&dbus_error);
> +	cgroup_manager = nih_dbus_proxy_new(NULL, connection,
> +				NULL /* p2p */,
> +				"/org/linuxcontainers/cgmanager", NULL, NULL);
> +	dbus_connection_unref(connection);
> +	if (!cgroup_manager) {
> +		return false;
> +	}
> +	return true;
> +}
> +
> +/*
> + * Use the cgmanager to move a task into a cgroup for a particular
> + * hierarchy.
> + * All the subsystems in this hierarchy are co-mounted, so we only
> + * need to transition the task into one of the cgroups
> + */
> +static bool lxc_cgmanager_enter(pid_t pid, struct cgroup_hierarchy *h, char *cgroup_path)
> +{
> +	char *controller = h->subsystems[0];
> +
> +	if (!cgmanager_initialized) {
> +		if (!lxc_init_cgmanager()) {
> +			ERROR("%s: could not reach cgmanager", __func__);
> +			return -1;
> +		}
> +	}
> +	return cgmanager_move_pid_sync(NULL, cgroup_manager, controller,
> +				       cgroup_path, pid) == 0;
> +}
> +
> +static bool lxc_cgmanager_create(struct cgroup_mount_point *mp, const char *cgroup_path)
> +{
> +	char *controller = mp->hierarchy->subsystems[0];
> +
> +	if (!cgmanager_initialized) {
> +		if (!lxc_init_cgmanager()) {
> +			ERROR("%s: could not reach cgmanager", __func__);
> +			return -1;
> +		}
> +	}
> +	if ( cgmanager_create_sync(NULL, cgroup_manager, controller,
> +				       cgroup_path) != 0) {
> +		ERROR("Failed to create %s:%s", controller, cgroup_path);
> +		return -1;
> +	}
> +
> +	// TODO - try to chown the cgroup to the container root
> +	return 0;
> +}
> +
> +#else
> +static inline bool lxc_cgmanager_enter(pid_t pid, struct cgroup_hierarchy *h, char *cgroup_path)
> +{
> +	return false;
> +}
> +static inline bool lxc_cgmanager_create(struct cgroup_mount_point *mp, const char *cgroup_path)
> +{
> +	return false;
> +}
> +static bool use_cgmanager = false;
> +#endif
> +
>  static int cgroup_rmdir(char *dirname)
>  {
>  	struct dirent dirent, *direntp;
> @@ -498,15 +595,20 @@ struct cgroup_meta_data *lxc_cgroup_load_meta2(const char **subsystem_whitelist)
>  				all_named_subsystems, subsystem_whitelist))
>  		goto out_error;
>  
> -	if (!find_hierarchy_mountpts(meta_data, kernel_subsystems))
> -		goto out_error;
> -
> -	/* oops, we couldn't find anything */
> -	if (!meta_data->hierarchies || !meta_data->mount_points) {
> +	if (!meta_data->hierarchies) {
>  		errno = EINVAL;
>  		goto out_error;
>  	}
>  
> +	if (!use_cgmanager) {
> +		if (!find_hierarchy_mountpts(meta_data, kernel_subsystems))
> +			goto out_error;
> +		if (!meta_data->mount_points) {
> +			errno = EINVAL;
> +			goto out_error;
> +		}
> +	}
> +
>  	lxc_free_array((void **)kernel_subsystems, free);
>  	return meta_data;
>  
> @@ -723,7 +825,7 @@ extern struct cgroup_process_info *lxc_cgroup_create(const char *name, const cha
>  	char *path_so_far = NULL;
>  	char **new_cgroup_paths = NULL;
>  	char **new_cgroup_paths_sub = NULL;
> -	struct cgroup_mount_point *mp;
> +	struct cgroup_mount_point *mp = NULL;
>  	struct cgroup_hierarchy *h;
>  	struct cgroup_process_info *base_info = NULL;
>  	struct cgroup_process_info *info_ptr;
> @@ -763,20 +865,22 @@ extern struct cgroup_process_info *lxc_cgroup_create(const char *name, const cha
>  		goto out_initial_error;
>  
>  	/* find mount points we can use */
> -	for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
> -		h = info_ptr->hierarchy;
> -		mp = lxc_cgroup_find_mount_point(h, info_ptr->cgroup_path, true);
> -		if (!mp) {
> -			ERROR("Could not find writable mount point for cgroup hierarchy %d while trying to create cgroup.", h->index);
> -			goto out_initial_error;
> -		}
> -		info_ptr->designated_mount_point = mp;
> +	if (!use_cgmanager) {
> +		for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
> +			h = info_ptr->hierarchy;
> +			mp = lxc_cgroup_find_mount_point(h, info_ptr->cgroup_path, true);
> +			if (!mp) {
> +				ERROR("Could not find writable mount point for cgroup hierarchy %d while trying to create cgroup.", h->index);
> +				goto out_initial_error;
> +			}
> +			info_ptr->designated_mount_point = mp;
>  
> -		if (lxc_string_in_array("ns", (const char **)h->subsystems))
> -			continue;
> -		if (handle_cgroup_settings(mp, info_ptr->cgroup_path) < 0) {
> -			ERROR("Could not set clone_children to 1 for cpuset hierarchy in parent cgroup.");
> -			goto out_initial_error;
> +			if (lxc_string_in_array("ns", (const char **)h->subsystems))
> +				continue;
> +			if (handle_cgroup_settings(mp, info_ptr->cgroup_path) < 0) {
> +				ERROR("Could not set clone_children to 1 for cpuset hierarchy in parent cgroup.");
> +				goto out_initial_error;
> +			}
>  		}
>  	}
>  
> @@ -974,6 +1078,8 @@ int lxc_cgroup_create_legacy(struct cgroup_process_info *base_info, const char *
>  	struct cgroup_process_info *info_ptr;
>  	int r;
>  
> +	if (use_cgmanager)
> +		return 0;
>  	for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
>  		if (!lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
>  			continue;
> @@ -1030,7 +1136,10 @@ struct cgroup_process_info *lxc_cgroup_get_container_info(const char *name, cons
>  		/* it is not an error if we don't find anything here,
>  		 * it is up to the caller to decide what to do in that
>  		 * case */
> -		entry->designated_mount_point = lxc_cgroup_find_mount_point(h, entry->cgroup_path, true);
> +		if (use_cgmanager)
> +			entry->designated_mount_point = NULL;
> +		else
> +			entry->designated_mount_point = lxc_cgroup_find_mount_point(h, entry->cgroup_path, true);
>  
>  		*cptr = entry;
>  		cptr = &entry->next;
> @@ -1061,6 +1170,14 @@ int lxc_cgroup_enter(struct cgroup_process_info *info, pid_t pid, bool enter_sub
>  			info_ptr->cgroup_path_sub :
>  			info_ptr->cgroup_path;
>  
> +		if (use_cgmanager) {
> +			if (!lxc_cgmanager_enter(pid, info_ptr->hierarchy, cgroup_path)) {
> +				ERROR("Could not add %lu to cgroup %s",
> +						(unsigned long)pid, cgroup_path);
> +				return -1;
> +			}
> +			continue;
> +		}
>  		if (!info_ptr->designated_mount_point) {
>  			info_ptr->designated_mount_point = lxc_cgroup_find_mount_point(info_ptr->hierarchy, cgroup_path, true);
>  			if (!info_ptr->designated_mount_point) {
> @@ -1110,15 +1227,19 @@ void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info *info)
>  		return;
>  	next = info->next;
>  	{
> -		struct cgroup_mount_point *mp = info->designated_mount_point;
> -		if (!mp)
> -			mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
> -		if (mp)
> -			/* ignore return value here, perhaps we created the
> -			 * '/lxc' cgroup in this container but another container
> -			 * is still running (for example)
> -			 */
> -			(void)remove_cgroup(mp, info->cgroup_path, true);
> +		if (use_cgmanager) {
> +			(void)remove_cgroup(NULL, info->cgroup_path, true);
> +		} else {
> +			struct cgroup_mount_point *mp = info->designated_mount_point;
> +			if (!mp)
> +				mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
> +			if (mp)
> +				/* ignore return value here, perhaps we created the
> +				 * '/lxc' cgroup in this container but another container
> +				 * is still running (for example)
> +				 */
> +				(void)remove_cgroup(mp, info->cgroup_path, true);
> +		}
>  	}
>  	for (pp = info->created_paths; pp && *pp; pp++);
>  	for ((void)(pp && --pp); info->created_paths && pp >= info->created_paths; --pp) {
> @@ -1151,6 +1272,10 @@ char *lxc_cgroup_get_hierarchy_abs_path_handler(const char *subsystem, struct lx
>  	struct cgroup_process_info *info = find_info_for_subsystem(handler->cgroup, subsystem);
>  	if (!info)
>  		return NULL;
> +
> +	if (use_cgmanager)
> +		return cgroup_to_absolute_path(NULL, info->cgroup_path, NULL);
> +
>  	if (info->designated_mount_point) {
>  		mp = info->designated_mount_point;
>  	} else {
> @@ -1342,6 +1467,12 @@ int lxc_setup_mount_cgroup(const char *root, struct cgroup_process_info *base_in
>  		return -1;
>  	}
>  
> +	/* TODO if using cgmanager, then cgroup_auto should mount /sys/fs/cgroup/cgmanager/ */
> +	if (use_cgmanager) {
> +		INFO("Using cgmanager, so not mounting sysfs paths");
> +		goto remount_ro;
> +	}
> +
>  	/* now mount all the hierarchies we care about */
>  	for (info = base_info; info; info = info->next) {
>  		size_t subsystem_count, i;
> @@ -1469,6 +1600,7 @@ int lxc_setup_mount_cgroup(const char *root, struct cgroup_process_info *base_in
>  		parts = NULL;
>  	}
>  
> +remount_ro:
>  	/* try to remount the tmpfs readonly, since the container shouldn't
>  	 * change anything (this will also make sure that trying to create
>  	 * new cgroups outside the allowed area fails with an error instead
> @@ -1681,9 +1813,17 @@ bool is_valid_cgroup(const char *name)
>  	return strcmp(name, ".") != 0 && strcmp(name, "..") != 0;
>  }
>  
> +/* TODO - if in a userns, try to chown the new cgroup to the container root */
>  int create_or_remove_cgroup(bool do_remove, struct cgroup_mount_point *mp, const char *path, int recurse)
>  {
>  	int r, saved_errno = 0;
> +
> +	if (use_cgmanager) {
> +		if (do_remove) // not yet implemented in cgmanager
> +			return 0;
> +		return lxc_cgmanager_create(mp, path) ? 0 : -1;
> +	}
> +
>  	char *buf = cgroup_to_absolute_path(mp, path, NULL);
>  	if (!buf)
>  		return -1;
> @@ -1696,6 +1836,7 @@ int create_or_remove_cgroup(bool do_remove, struct cgroup_mount_point *mp, const
>  			r = rmdir(buf);
>  	} else
>  		r = mkdir(buf, 0777);
> +
>  	saved_errno = errno;
>  	free(buf);
>  	errno = saved_errno;
> @@ -1715,10 +1856,12 @@ int remove_cgroup(struct cgroup_mount_point *mp, const char *path, bool recurse)
>  char *cgroup_to_absolute_path(struct cgroup_mount_point *mp, const char *path, const char *suffix)
>  {
>  	/* first we have to make sure we subtract the mount point's prefix */
> -	char *prefix = mp->mount_prefix;
> +	char *prefix = NULL;
>  	char *buf;
>  	ssize_t len, rv;
>  
> +	if (mp)
> +		prefix = mp->mount_prefix;
>  	/* we want to make sure only absolute paths to cgroups are passed to us */
>  	if (path[0] != '/') {
>  		errno = EINVAL;
> @@ -1742,11 +1885,14 @@ char *cgroup_to_absolute_path(struct cgroup_mount_point *mp, const char *path, c
>  	/* remove prefix from path */
>  	path += prefix ? strlen(prefix) : 0;
>  
> -	len = strlen(mp->mount_point) + strlen(path) + (suffix ? strlen(suffix) : 0);
> +	len = strlen(path) + (suffix ? strlen(suffix) : 0);
> +	if (mp)
> +		len += strlen(mp->mount_point);
>  	buf = calloc(len + 1, 1);
>  	if (!buf)
>  		return NULL;
> -	rv = snprintf(buf, len + 1, "%s%s%s", mp->mount_point, path, suffix ? suffix : "");
> +	rv = snprintf(buf, len + 1, "%s%s%s", mp->mount_point ? mp->mount_point : "",
> +				path, suffix ? suffix : "");
>  	if (rv > len) {
>  		free(buf);
>  		errno = ENOMEM;
> @@ -1986,6 +2132,10 @@ int handle_cgroup_settings(struct cgroup_mount_point *mp, char *cgroup_path)
>  	int r, saved_errno = 0;
>  	char buf[2];
>  
> +	// If using the cgmanager, it will have set these for us
> +	if (!mp)
> +		return 0;
> +
>  	/* If this is the memory cgroup, we want to enforce hierarchy.
>  	 * But don't fail if for some reason we can't.
>  	 */
> diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h
> index 3aab12d..6ba83ac 100644
> --- a/src/lxc/cgroup.h
> +++ b/src/lxc/cgroup.h
> @@ -41,6 +41,7 @@ struct cgroup_meta_data {
>  	struct cgroup_hierarchy **hierarchies;
>  	struct cgroup_mount_point **mount_points;
>  	int maximum_hierarchy;
> +	bool use_cgmanager;
>  };
>  
>  /*
> -- 
> 1.8.5.2
> 
> _______________________________________________
> lxc-devel mailing list
> lxc-devel at lists.linuxcontainers.org
> http://lists.linuxcontainers.org/listinfo/lxc-devel

-- 
Stéphane Graber
Ubuntu developer
http://www.ubuntu.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: Digital signature
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20140113/3a63f699/attachment.pgp>


More information about the lxc-devel mailing list