[Lxc-users] [PATCH 2/2] cgroups: support cgroups mounted in multiple places
Michael H. Warfield
mhw at WittsEnd.com
Fri Jun 24 18:32:11 UTC 2011
On Fri, 2011-06-24 at 12:54 -0500, Serge Hallyn wrote:
> I.e. with systemd or libcgroup.
> To do this, instead of looking for one cgroup called 'lxc' or
> otherwise taking the first cgroup we find, we actually create a
> container in every mounted cgroup fs. Right now it's done under the
> root of each fs. We may want to put that under lxc, or, better yet,
> make that configurable.
Yeah, looking at my system and just looking for the devices.* entries, I
see some interesting stuff there that libvirt is doing...
[root at forest mhw]# find /sys/fs/cgroup -name devices\*
/sys/fs/cgroup/devices
/sys/fs/cgroup/devices/libvirt/lxc/devices.list
/sys/fs/cgroup/devices/libvirt/lxc/devices.deny
/sys/fs/cgroup/devices/libvirt/lxc/devices.allow
/sys/fs/cgroup/devices/libvirt/qemu/devices.list
/sys/fs/cgroup/devices/libvirt/qemu/devices.deny
/sys/fs/cgroup/devices/libvirt/qemu/devices.allow
/sys/fs/cgroup/devices/libvirt/devices.list
/sys/fs/cgroup/devices/libvirt/devices.deny
/sys/fs/cgroup/devices/libvirt/devices.allow
/sys/fs/cgroup/devices/devices.list
/sys/fs/cgroup/devices/devices.deny
/sys/fs/cgroup/devices/devices.allow
Granted that the "lxc" under libvert is not this "lxc" project and they
have their own xml based stuff, still looks like they're stuffing things
under another directory. Good question there.
> Note the use of clone_children seems not quite right, but that's
> not for this patch to fix. In particular, if clone_children is
> not in the mntopts, we reject it. Yet later we try to set it
> ourselves. I believe we should simply, if ns cgroup is not
> composed, always try to set clone_children to 1. As it stands,
> with libcgroup installed, I had to do
> cd /sys/fs/cgroup
> for d in `/bin/ls`; do
> echo 1 > $d/cgroup.clone_children
> done
> But after that, 'lxc-start -n l1' worked like a charm. It also
> continues to work with a single mount of cgroups under /cgroup.
Cool. I'll check this out. Haven't had much time for coding, here the
last couple of weeks, but I can at least do some testing.
> Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
> ---
> src/lxc/cgroup.c | 207 +++++++++++++++++++++++++++++++++--------------------
> src/lxc/cgroup.h | 2 +-
> src/lxc/freezer.c | 2 +-
> src/lxc/lxc.h | 8 +-
> src/lxc/state.c | 2 +-
> 5 files changed, 135 insertions(+), 86 deletions(-)
>
> diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c
> index a068a01..ecba56e 100644
> --- a/src/lxc/cgroup.c
> +++ b/src/lxc/cgroup.c
> @@ -52,11 +52,10 @@ enum {
> CGROUP_CLONE_CHILDREN,
> };
>
> -static int get_cgroup_mount(const char *mtab, char *mnt)
> +static int get_cgroup_mount(const char *mtab, const char *subsystem, char *mnt)
> {
> struct mntent *mntent;
> FILE *file = NULL;
> - int err = -1;
>
> file = setmntent(mtab, "r");
> if (!file) {
> @@ -66,29 +65,24 @@ static int get_cgroup_mount(const char *mtab, char *mnt)
>
> while ((mntent = getmntent(file))) {
>
> - /* there is a cgroup mounted named "lxc" */
> - if (!strcmp(mntent->mnt_fsname, "lxc") &&
> - !strcmp(mntent->mnt_type, "cgroup")) {
> - strcpy(mnt, mntent->mnt_dir);
> - err = 0;
> - break;
> - }
> -
> - /* fallback to the first non-lxc cgroup found */
> - if (!strcmp(mntent->mnt_type, "cgroup") && err) {
> + if (strcmp(mntent->mnt_type, "cgroup"))
> + continue;
> + if (!subsystem || hasmntopt(mntent, subsystem)) {
> strcpy(mnt, mntent->mnt_dir);
> - err = 0;
> + fclose(file);
> + DEBUG("using cgroup mounted at '%s'", mnt);
> + return 0;
> }
> };
>
> - DEBUG("using cgroup mounted at '%s'", mnt);
> + DEBUG("Failed to find cgroup for %s\n", subsystem ? subsystem : "(NULL)");
>
> fclose(file);
>
> - return err;
> + return -1;
> }
>
> -static int get_cgroup_flags(const char *mtab, int *flags)
> +static int get_cgroup_flags(const char *mtab, const char *mnt_dir, int *flags)
> {
> struct mntent *mntent;
> FILE *file = NULL;
> @@ -103,38 +97,24 @@ static int get_cgroup_flags(const char *mtab, int *flags)
> *flags = 0;
>
> while ((mntent = getmntent(file))) {
> -
> - /* there is a cgroup mounted named "lxc" */
> - if (!strcmp(mntent->mnt_fsname, "lxc") &&
> - !strcmp(mntent->mnt_type, "cgroup")) {
> -
> - if (hasmntopt(mntent, "ns"))
> - *flags |= CGROUP_NS_CGROUP;
> -
> - if (hasmntopt(mntent, "clone_children"))
> - *flags |= CGROUP_CLONE_CHILDREN;
> -
> + if (strcmp(mntent->mnt_type, "cgroup"))
> + continue;
> + if (strcmp(mntent->mnt_dir, mnt_dir))
> + continue;
> + if (hasmntopt(mntent, "ns")) {
> + *flags |= CGROUP_NS_CGROUP;
> err = 0;
> - break;
> }
> -
> - /* fallback to the first non-lxc cgroup found */
> - if (!strcmp(mntent->mnt_type, "cgroup") && err) {
> -
> - if (hasmntopt(mntent, "ns"))
> - *flags |= CGROUP_NS_CGROUP;
> -
> - if (hasmntopt(mntent, "clone_children"))
> - *flags |= CGROUP_CLONE_CHILDREN;
> -
> + if (hasmntopt(mntent, "clone_children")) {
> + *flags |= CGROUP_CLONE_CHILDREN;
> err = 0;
> }
> - };
> -
> - DEBUG("cgroup flags is 0x%x", *flags);
> -
> - fclose(file);
> + fclose(file);
> + DEBUG("cgroup flags for %s is 0x%x", mnt_dir, *flags);
> + return err;
> + }
>
> + fclose(file);
> return err;
> }
>
> @@ -199,18 +179,17 @@ static int cgroup_attach(const char *path, pid_t pid)
> return ret;
> }
>
> -int lxc_cgroup_create(const char *name, pid_t pid)
> +/*
> + * create a cgroup for the container in a particular subsystem.
> + * XXX TODO we will of course want to use cgroup_path{subsystem}/lxc/name,
> + * not just cgroup_path{subsystem}/name.
> + */
> +int lxc_one_cgroup_create(const char *name, const char *cgmnt, pid_t pid)
> {
> - char cgmnt[MAXPATHLEN];
> char cgname[MAXPATHLEN];
> char clonechild[MAXPATHLEN];
> int flags;
>
> - if (get_cgroup_mount(MTAB, cgmnt)) {
> - ERROR("cgroup is not mounted");
> - return -1;
> - }
> -
> snprintf(cgname, MAXPATHLEN, "%s/%s", cgmnt, name);
>
> /*
> @@ -222,7 +201,7 @@ int lxc_cgroup_create(const char *name, pid_t pid)
> return -1;
> }
>
> - if (get_cgroup_flags(MTAB, &flags)) {
> + if (get_cgroup_flags(MTAB, cgmnt, &flags)) {
> SYSERROR("failed to get cgroup flags");
> return -1;
> }
> @@ -266,16 +245,44 @@ int lxc_cgroup_create(const char *name, pid_t pid)
> return 0;
> }
>
> -int lxc_cgroup_destroy(const char *name)
> +/*
> + * for each mounted cgroup, create a cgroup for the container
> + */
> +int lxc_cgroup_create(const char *name, pid_t pid)
> {
> - char cgmnt[MAXPATHLEN];
> - char cgname[MAXPATHLEN];
> + struct mntent *mntent;
> + FILE *file = NULL;
> + int ret, err = -1;
>
> - if (get_cgroup_mount(MTAB, cgmnt)) {
> - ERROR("cgroup is not mounted");
> + DEBUG("%s: starting\n", __func__);
> + file = setmntent(MTAB, "r");
> + if (!file) {
> + SYSERROR("failed to open %s", MTAB);
> return -1;
> }
>
> + while ((mntent = getmntent(file))) {
> + if (!strcmp(mntent->mnt_type, "cgroup")) {
> + DEBUG("creating %s %s %d\n", name, mntent->mnt_dir, pid);
> + err = lxc_one_cgroup_create(name, mntent->mnt_dir, pid);
> + if (ret) {
> + fclose(file);
> + return ret;
> + }
> + err = 0;
> + }
> + };
> +
> + fclose(file);
> +
> + return err;
> +}
> +
> +
> +int lxc_one_cgroup_destroy(const char *cgmnt, const char *name)
> +{
> + char cgname[MAXPATHLEN];
> +
> snprintf(cgname, MAXPATHLEN, "%s/%s", cgmnt, name);
> if (rmdir(cgname)) {
> SYSERROR("failed to remove cgroup '%s'", cgname);
> @@ -287,37 +294,79 @@ int lxc_cgroup_destroy(const char *name)
> return 0;
> }
>
> -int lxc_cgroup_path_get(char **path, const char *name)
> +/*
> + * for each mounted cgroup, destroy the cgroup for the container
> + */
> +int lxc_cgroup_destroy(const char *name)
> {
> - static char cgroup[MAXPATHLEN];
> - static const char* cgroup_cached = 0;
> - static char buf[MAXPATHLEN];
> + struct mntent *mntent;
> + FILE *file = NULL;
> + int ret, err = -1;
>
> - if (!cgroup_cached) {
> - if (get_cgroup_mount(MTAB, cgroup)) {
> - ERROR("cgroup is not mounted");
> - return -1;
> - } else {
> - cgroup_cached = cgroup;
> + file = setmntent(MTAB, "r");
> + if (!file) {
> + SYSERROR("failed to open %s", MTAB);
> + return -1;
> + }
> +
> + while ((mntent = getmntent(file))) {
> + if (!strcmp(mntent->mnt_type, "cgroup")) {
> + DEBUG("destroying %s %s\n", mntent->mnt_dir, name);
> + ret = lxc_one_cgroup_destroy(mntent->mnt_dir, name);
> + if (ret) {
> + fclose(file);
> + return ret;
> + }
> + err = 0;
> }
> + }
> +
> + fclose(file);
> +
> + return err;
> +}
> +/*
> + * lxc_cgroup_path_get: put into *path the pathname for
> + * %subsystem and cgroup %name. If %subsystem is NULL, then
> + * the first mounted cgroup will be used (for nr_tasks)
> + */
> +int lxc_cgroup_path_get(char **path, const char *subsystem, const char *name)
> +{
> + static char buf[MAXPATHLEN];
> + static char retbuf[MAXPATHLEN];
> +
> + /* what lxc_cgroup_set calls subsystem is actually the filename, i.e.
> + 'devices.allow'. So for our purposee we trim it */
> + if (subsystem) {
> + snprintf(retbuf, MAXPATHLEN, "%s", subsystem);
> + char *s = index(retbuf, '.');
> + *s = '\0';
> + DEBUG("%s: called for subsys %s name %s\n", __func__, retbuf, name);
> }
> + if (get_cgroup_mount(MTAB, subsystem ? retbuf : NULL, buf)) {
> + ERROR("cgroup is not mounted");
> + return -1;
> + }
> +
> + snprintf(retbuf, MAXPATHLEN, "%s/%s", buf, name);
> +
> + DEBUG("%s: returning %s for subsystem %s", __func__, retbuf, subsystem);
>
> - snprintf(buf, MAXPATHLEN, "%s/%s", cgroup_cached, name);
> - *path = buf;
> + *path = retbuf;
> return 0;
> }
>
> -int lxc_cgroup_set(const char *name, const char *subsystem, const char *value)
> +int lxc_cgroup_set(const char *name, const char *filename, const char *value)
> {
> int fd, ret;
> - char *nsgroup;
> + char *dirpath;
> char path[MAXPATHLEN];
>
> - ret = lxc_cgroup_path_get(&nsgroup, name);
> + ret = lxc_cgroup_path_get(&dirpath, filename, name);
> if (ret)
> return -1;
>
> - snprintf(path, MAXPATHLEN, "%s/%s", nsgroup, subsystem);
> + snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename);
>
> fd = open(path, O_WRONLY);
> if (fd < 0) {
> @@ -337,18 +386,18 @@ out:
> return ret;
> }
>
> -int lxc_cgroup_get(const char *name, const char *subsystem,
> +int lxc_cgroup_get(const char *name, const char *filename,
> char *value, size_t len)
> {
> int fd, ret = -1;
> - char *nsgroup;
> + char *dirpath;
> char path[MAXPATHLEN];
>
> - ret = lxc_cgroup_path_get(&nsgroup, name);
> + ret = lxc_cgroup_path_get(&dirpath, filename, name);
> if (ret)
> return -1;
>
> - snprintf(path, MAXPATHLEN, "%s/%s", nsgroup, subsystem);
> + snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename);
>
> fd = open(path, O_RDONLY);
> if (fd < 0) {
> @@ -366,16 +415,16 @@ int lxc_cgroup_get(const char *name, const char *subsystem,
>
> int lxc_cgroup_nrtasks(const char *name)
> {
> - char *nsgroup;
> + char *dpath;
> char path[MAXPATHLEN];
> int pid, ret, count = 0;
> FILE *file;
>
> - ret = lxc_cgroup_path_get(&nsgroup, name);
> + ret = lxc_cgroup_path_get(&dpath, NULL, name);
> if (ret)
> return -1;
>
> - snprintf(path, MAXPATHLEN, "%s/tasks", nsgroup);
> + snprintf(path, MAXPATHLEN, "%s/tasks", dpath);
>
> file = fopen(path, "r");
> if (!file) {
> diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h
> index 8607fa8..f6df8c5 100644
> --- a/src/lxc/cgroup.h
> +++ b/src/lxc/cgroup.h
> @@ -28,6 +28,6 @@
> struct lxc_handler;
> int lxc_cgroup_create(const char *name, pid_t pid);
> int lxc_cgroup_destroy(const char *name);
> -int lxc_cgroup_path_get(char **path, const char *name);
> +int lxc_cgroup_path_get(char **path, const char *subsystem, const char *name);
> int lxc_cgroup_nrtasks(const char *name);
> #endif
> diff --git a/src/lxc/freezer.c b/src/lxc/freezer.c
> index 07eede5..94984a3 100644
> --- a/src/lxc/freezer.c
> +++ b/src/lxc/freezer.c
> @@ -45,7 +45,7 @@ static int freeze_unfreeze(const char *name, int freeze)
> char tmpf[32];
> int fd, ret;
>
> - ret = lxc_cgroup_path_get(&nsgroup, name);
> + ret = lxc_cgroup_path_get(&nsgroup, "freezer", name);
> if (ret)
> return -1;
>
> diff --git a/src/lxc/lxc.h b/src/lxc/lxc.h
> index a091baa..04df1c6 100644
> --- a/src/lxc/lxc.h
> +++ b/src/lxc/lxc.h
> @@ -114,22 +114,22 @@ extern lxc_state_t lxc_state(const char *name);
> * Set a specified value for a specified subsystem. The specified
> * subsystem must be fully specified, eg. "cpu.shares"
> * @name : the name of the container
> - * @subsystem : the subsystem
> + * @filename : the cgroup attribute filename
> * @value : the value to be set
> * Returns 0 on success, < 0 otherwise
> */
> -extern int lxc_cgroup_set(const char *name, const char *subsystem, const char *value);
> +extern int lxc_cgroup_set(const char *name, const char *filename, const char *value);
>
> /*
> * Get a specified value for a specified subsystem. The specified
> * subsystem must be fully specified, eg. "cpu.shares"
> * @name : the name of the container
> - * @subsystem : the subsystem
> + * @filename : the cgroup attribute filename
> * @value : the value to be set
> * @len : the len of the value variable
> * Returns the number of bytes read, < 0 on error
> */
> -extern int lxc_cgroup_get(const char *name, const char *subsystem,
> +extern int lxc_cgroup_get(const char *name, const char *filename,
> char *value, size_t len);
>
> /*
> diff --git a/src/lxc/state.c b/src/lxc/state.c
> index 6720011..b435eba 100644
> --- a/src/lxc/state.c
> +++ b/src/lxc/state.c
> @@ -71,7 +71,7 @@ static int freezer_state(const char *name)
> FILE *file;
> int err;
>
> - err = lxc_cgroup_path_get(&nsgroup, name);
> + err = lxc_cgroup_path_get(&nsgroup, "freezer", name);
> if (err)
> return -1;
>
--
Michael H. Warfield (AI4NB) | (770) 985-6132 | mhw at WittsEnd.com
/\/\|=mhw=|\/\/ | (678) 463-0932 | http://www.wittsend.com/mhw/
NIC whois: MHW9 | An optimist believes we live in the best of all
PGP Key: 0x674627FF | possible worlds. A pessimist is sure of it!
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 482 bytes
Desc: This is a digitally signed message part
URL: <http://lists.linuxcontainers.org/pipermail/lxc-users/attachments/20110624/f0a102c3/attachment.pgp>
More information about the lxc-users
mailing list