[Lxc-users] [PATCH] Cgroup cleanups: play more nicely with others, and support nesting
Derek Simkowiak
derek at simkowiak.net
Fri Dec 9 20:50:35 UTC 2011
Serge,
Could you please elaborate on this comment?
(Of course, the containers must be on a different subnet)
Do you mean a TCP/IP subnet? If so, why does this limitation exist?
I would like to use nested LXC containers for reselling CPU, disk,
and network at a data center. (I.e., my customers re-sell their CPU,
disk, and network to their customers by using nested LXC containers.)
In that scenario, all LXC containers (incl. nested ones) would need
to be on the same subnet (because that's how the data center sells
cabinets).
Thanks,
Derek
On 12/09/2011 12:43 PM, Serge Hallyn wrote:
> Summary:
>
> With this patch, I can start a container 'o1' inside another container 'o1'.
> (Of course, the containers must be on a different subnet)
>
> Detail:
>
> 1. Create cgroups for containers under /lxc.
>
> 2. Support nested lxc: respect init's cgroup:
>
> Create cgroups under init's cgroup. So if we start a container c2
> inside a container 'c1', we'll use /sys/fs/cgroup/freezer/lxc/c1/lxc/c2
> instead of /sys/fs/cgroup/freezer/c2. This allows a container c1
> to be created inside container c1 It also allow a container's limits
> to be enforced on all a container's children (which a MAC policy could
> already enforce, in which case current lxc code would be unable to nest
> altogether).
>
> 3. Finally, if a container's cgroup already exists, rename it rather than
> failing to start the container. Try to WARN the user so they might go
> clean the old cgroup up.
>
> Whereas without this patch, container o1's cgroup would be
> /sys/fs/cgroup/<subsys>/o1,
> it now becomes
> /sys/fs/cgroup/<subsys>/<initcgroup>/lxc/o1
> so if init is in cgroup '/' then o1's freezer cgroup would be:
> /sys/fs/cgroup/freezer/lxc/o1
>
> Signed-off-by: Serge Hallyn<serge.hallyn at canonical.com>
> ---
> src/lxc/cgroup.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++-------
> 1 files changed, 129 insertions(+), 18 deletions(-)
>
> diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c
> index a2b823e..8d3b245 100644
> --- a/src/lxc/cgroup.c
> +++ b/src/lxc/cgroup.c
> @@ -52,9 +52,65 @@ enum {
> CGROUP_CLONE_CHILDREN,
> };
>
> +/*
> + * get_init_cgroup: get the cgroup init is in.
> + * dsg: preallocated buffer to put the output in
> + * subsystem: the exact cgroup subsystem to look up
> + * mntent: a mntent (from getmntent) whose mntopts contains the
> + * subsystem to look up.
> + *
> + * subsystem and mntent can both be NULL, in which case we return
> + * the first entry in /proc/1/cgroup.
> + *
> + * Returns a pointer to the answer, which may be "".
> + */
> +static char *get_init_cgroup(const char *subsystem, struct mntent *mntent,
> + char *dsg)
> +{
> + FILE *f;
> + char *c, *c2;
> + char line[MAXPATHLEN];
> +
> + *dsg = '\0';
> + f = fopen("/proc/1/cgroup", "r");
> + if (!f)
> + return dsg;
> +
> + while (fgets(line, MAXPATHLEN, f)) {
> + c = index(line, ':');
> + if (!c)
> + continue;
> + c++;
> + c2 = index(c, ':');
> + if (!c2)
> + continue;
> + *c2 = '\0';
> + c2++;
> + if (!subsystem&& !mntent)
> + goto good;
> + if (subsystem&& strcmp(c, subsystem) != 0)
> + continue;
> + if (mntent&& !hasmntopt(mntent, c))
> + continue;
> +good:
> + DEBUG("get_init_cgroup: found init cgroup for subsys %s at %s\n",
> + subsystem, c2);
> + strncpy(dsg, c2, MAXPATHLEN);
> + c =&dsg[strlen(dsg)-1];
> + if (*c == '\n')
> + *c = '\0';
> + goto found;
> + }
> +
> +found:
> + fclose(f);
> + return dsg;
> +}
> +
> static int get_cgroup_mount(const char *subsystem, char *mnt)
> {
> struct mntent *mntent;
> + char initcgroup[MAXPATHLEN];
> FILE *file = NULL;
>
> file = setmntent(MTAB, "r");
> @@ -68,13 +124,18 @@ static int get_cgroup_mount(const char *subsystem, char *mnt)
> if (strcmp(mntent->mnt_type, "cgroup"))
> continue;
> if (!subsystem || hasmntopt(mntent, subsystem)) {
> - strcpy(mnt, mntent->mnt_dir);
> + int ret;
> + ret = snprintf(mnt, MAXPATHLEN, "%s%s/lxc", mntent->mnt_dir,
> + get_init_cgroup(subsystem, NULL, initcgroup));
> + if (ret< 0 || ret>= MAXPATHLEN)
> + goto fail;
> fclose(file);
> DEBUG("using cgroup mounted at '%s'", mnt);
> return 0;
> }
> };
>
> +fail:
> DEBUG("Failed to find cgroup for %s\n", subsystem ? subsystem : "(NULL)");
>
> fclose(file);
> @@ -166,26 +227,71 @@ static int cgroup_attach(const char *path, pid_t pid)
> }
>
> /*
> + * rename cgname, which is under cgparent, to a new name starting
> + * with 'cgparent/dead'. That way cgname can be reused. Return
> + * 0 on success, -1 on failure.
> + */
> +int try_to_move_cgname(char *cgparent, char *cgname)
> +{
> + char *newdir;
> +
> + /* tempnam problems don't matter here - cgroupfs will prevent
> + * duplicates if we race, and we'll just fail at that (unlikely)
> + * point
> + */
> +
> + newdir = tempnam(cgparent, "dead");
> + if (!newdir)
> + return -1;
> + if (rename(cgname, newdir))
> + return -1;
> + WARN("non-empty cgroup %s renamed to %s, please manually inspect it\n",
> + cgname, newdir);
> +
> + return 0;
> +}
> +
> +/*
> * create a cgroup for the container in a particular subsystem.
> - * XXX TODO we will of course want to use cgroup_path{subsystem}/lxc/name,
> - * not just cgroup_path{subsystem}/name.
> */
> static int lxc_one_cgroup_create(const char *name,
> struct mntent *mntent, pid_t pid)
> {
> - char cgname[MAXPATHLEN];
> + char cgname[MAXPATHLEN], cgparent[MAXPATHLEN];
> char clonechild[MAXPATHLEN];
> - int flags;
> + char initcgroup[MAXPATHLEN];
> + int flags, ret;
> +
> + /* cgparent is the parent dir, /sys/fs/cgroup/<init-cgroup>/lxc */
> + /* (remember get_init_cgroup() returns a path starting with '/') */
> + /* cgname is the full name, /sys/fs/cgroup/<init-cgroup>/lxc/name */
> + ret = snprintf(cgparent, MAXPATHLEN, "%s%s/lxc", mntent->mnt_dir,
> + get_init_cgroup(NULL, mntent, initcgroup));
> + if (ret< 0 || ret>= MAXPATHLEN) {
> + SYSERROR("Failed creating pathname for cgroup parent (%d)\n", ret);
> + return -1;
> + }
> + ret = snprintf(cgname, MAXPATHLEN, "%s/%s", cgparent, name);
> + if (ret< 0 || ret>= MAXPATHLEN) {
> + SYSERROR("Failed creating pathname for cgroup (%d)\n", ret);
> + return -1;
> + }
>
> - snprintf(cgname, MAXPATHLEN, "%s/%s", mntent->mnt_dir, name);
> + /* if /sys/fs/cgroup/<init-cgroup>/lxc does not exist, create it */
> + if (access(cgparent, F_OK)&& mkdir(cgparent, 0700)) {
> + SYSERROR("failed to create '%s' directory", cgparent);
> + return -1;
> + }
>
> /*
> - * There is a previous cgroup, assume it is empty,
> - * otherwise that fails
> + * There is a previous cgroup. Try to delete it. If that fails
> + * (i.e. it is not empty) try to move it out of the way.
> */
> if (!access(cgname, F_OK)&& rmdir(cgname)) {
> - SYSERROR("failed to remove previous cgroup '%s'", cgname);
> - return -1;
> + if (try_to_move_cgname(cgparent, cgname)) {
> + SYSERROR("failed to remove previous cgroup '%s'", cgname);
> + return -1;
> + }
> }
>
> flags = get_cgroup_flags(mntent);
> @@ -193,11 +299,14 @@ static int lxc_one_cgroup_create(const char *name,
> /* We have the deprecated ns_cgroup subsystem */
> if (flags& CGROUP_NS_CGROUP) {
> WARN("using deprecated ns_cgroup");
> - return cgroup_rename_nsgroup(mntent->mnt_dir, cgname, pid);
> + return cgroup_rename_nsgroup(cgparent, cgname, pid);
> }
>
> - snprintf(clonechild, MAXPATHLEN, "%s/cgroup.clone_children",
> - mntent->mnt_dir);
> + ret = snprintf(clonechild, MAXPATHLEN, "%s/cgroup.clone_children", cgparent);
> + if (ret< 0 || ret>= MAXPATHLEN) {
> + SYSERROR("Failed creating pathname for clone_children (%d)\n", ret);
> + return -1;
> + }
>
> /* we check if the kernel has clone_children, at this point if there
> * no clone_children neither ns_cgroup, that means the cgroup is mounted
> @@ -266,11 +375,14 @@ out:
> }
>
>
> -int lxc_one_cgroup_destroy(const char *cgmnt, const char *name)
> +int lxc_one_cgroup_destroy(struct mntent *mntent, const char *name)
> {
> - char cgname[MAXPATHLEN];
> + char cgname[MAXPATHLEN], initcgroup[MAXPATHLEN];
> + char *cgmnt = mntent->mnt_dir;
>
> - snprintf(cgname, MAXPATHLEN, "%s/%s", cgmnt, name);
> + snprintf(cgname, MAXPATHLEN, "%s%s/lxc/%s", cgmnt,
> + get_init_cgroup(NULL, mntent, initcgroup), name);
> + DEBUG("destroying %s\n", cgname);
> if (rmdir(cgname)) {
> SYSERROR("failed to remove cgroup '%s'", cgname);
> return -1;
> @@ -298,8 +410,7 @@ int lxc_cgroup_destroy(const char *name)
>
> while ((mntent = getmntent(file))) {
> if (!strcmp(mntent->mnt_type, "cgroup")) {
> - DEBUG("destroying %s %s\n", mntent->mnt_dir, name);
> - ret = lxc_one_cgroup_destroy(mntent->mnt_dir, name);
> + ret = lxc_one_cgroup_destroy(mntent, name);
> if (ret) {
> fclose(file);
> return ret;
More information about the lxc-users
mailing list