[cgmanager-devel] [PATCH 1/1] implement release-on-empty

Serge Hallyn serge.hallyn at ubuntu.com
Sat Mar 29 01:25:20 UTC 2014


The new kernel mechanism for automatic removal of empty cgroups
is not yet available, therefore this uses the original
release-agent feature.  The release-agent is only told the
cgroup name being deleted, not the controller.

Cgmanager installs a program cgm-release-agent into /sbin.  It
then symlinks that to cgm-release-agent.$controller under
/run/cgmanager/agents and registers each symlink with the
related controller.  The RemoveOnEmpty dbus call causes 1 to
be written to the notify-on-release file for the specified
cgroup, so that when it becomes empty the
cgm-release-agent.$controller will be called.

As a part of this the mounting of subsystems is broken up into
collecting the subsystems, then setting release agents, and
finally mounting the subsystems into our private namespace.

Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
---
 .gitignore                        |   1 +
 Makefile.am                       |   8 +-
 acinclude.m4                      | 137 +++++++++++++++++++
 cgm                               |  15 ++
 cgm-release-agent.c               |  92 +++++++++++++
 cgmanager-proxy.c                 |  51 +++++++
 cgmanager.c                       |  70 +++++++++-
 configure.ac                      |   2 +
 frontend.c                        |  89 ++++++++++++
 frontend.h                        |   5 +
 fs.c                              | 278 ++++++++++++++++++++++++++++----------
 fs.h                              |   6 +-
 org.linuxcontainers.cgmanager.xml |  10 +-
 tests/test21.sh                   |  34 +++++
 14 files changed, 722 insertions(+), 76 deletions(-)
 create mode 100644 acinclude.m4
 create mode 100644 cgm-release-agent.c
 create mode 100755 tests/test21.sh

diff --git a/.gitignore b/.gitignore
index 0a408cd..7decb58 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,3 +51,4 @@ cgm.1
 tests/*.o
 tests/cgm-concurrent
 00*
+cgm-release-agent
diff --git a/Makefile.am b/Makefile.am
index 5d62ada..8023250 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -7,7 +7,7 @@ VERSION_AGE      = 0
 LIBCGMANAGER_VERSION        = $(VERSION_CURRENT):$(VERSION_REVISION):$(VERSION_AGE)
 LIBCGMANAGER_VERSION_DOTTED = $(VERSION_CURRENT).$(VERSION_REVISION).$(VERSION_AGE)
 
-AM_CFLAGS = -Wall -ggdb -D_GNU_SOURCE
+AM_CFLAGS = -Wall -ggdb -D_GNU_SOURCE -DSBINDIR=\"$(SBINDIR)\"
 AM_CFLAGS += $(DBUS_CFLAGS) $(NIH_CFLAGS) $(NIH_DBUS_CFLAGS)
 AM_LDFLAGS = $(DBUS_LIBS) $(NIH_LIBS) $(NIH_DBUS_LIBS)
 client_files_OUTPUTS = cgmanager-client.c cgmanager-client.h
@@ -19,10 +19,11 @@ CLEANFILES = \
 	$(manager_files_OUTPUTS) \
 	$(client_files_OUTPUTS) \
 	cgmanager cgproxy \
+	cgm-release-agent  \
 	*.o *.so \
 	libcgmanager.pc.in
 
-sbin_PROGRAMS = cgmanager cgproxy
+sbin_PROGRAMS = cgmanager cgproxy cgm-release-agent
 
 bin_SCRIPTS = cgm
 
@@ -72,6 +73,9 @@ cgproxy_SOURCES = cgmanager-proxy.c \
 	fs.c fs.h cgmanager.h \
 	frontend.c frontend.h
 
+cgm_release_agent_SOURCES = cgm-release-agent.c
+cgm_release_agent_LDADD = -lcgmanager
+
 libcgmanager_la_SOURCES = \
 	$(client_files_OUTPUTS) \
 	org.linuxcontainers.cgmanager.xml
diff --git a/acinclude.m4 b/acinclude.m4
new file mode 100644
index 0000000..d718b5e
--- /dev/null
+++ b/acinclude.m4
@@ -0,0 +1,137 @@
+dnl as-ac-expand.m4 0.2.0
+dnl autostars m4 macro for expanding directories using configure's prefix
+dnl thomas at apestaart.org
+dnl
+
+dnl AS_AC_EXPAND(VAR, CONFIGURE_VAR)
+dnl example
+dnl AS_AC_EXPAND(SYSCONFDIR, $sysconfdir)
+dnl will set SYSCONFDIR to /usr/local/etc if prefix=/usr/local
+
+AC_DEFUN([AS_AC_EXPAND],
+[
+    EXP_VAR=[$1]
+    FROM_VAR=[$2]
+
+    dnl first expand prefix and exec_prefix if necessary
+    prefix_save=$prefix
+    exec_prefix_save=$exec_prefix
+
+    dnl if no prefix given, then use /usr/local, the default prefix
+    if test "x$prefix" = "xNONE"; then
+        prefix="$ac_default_prefix"
+    fi
+    dnl if no exec_prefix given, then use prefix
+    if test "x$exec_prefix" = "xNONE"; then
+        exec_prefix=$prefix
+    fi
+
+    full_var="$FROM_VAR"
+    dnl loop until it doesn't change anymore
+    while true; do
+        new_full_var="`eval echo $full_var`"
+        if test "x$new_full_var" = "x$full_var"; then break; fi
+        full_var=$new_full_var
+    done
+
+    dnl clean up
+    full_var=$new_full_var
+    AC_SUBST([$1], "$full_var")
+
+    dnl restore prefix and exec_prefix
+    prefix=$prefix_save
+    exec_prefix=$exec_prefix_save
+])
+
+dnl Available from the GNU Autoconf Macro Archive at:
+dnl http://www.gnu.org/software/ac-archive/htmldoc/ax_compare_version.html
+AC_DEFUN([AX_COMPARE_VERSION], [
+# Used to indicate true or false condition
+ax_compare_version=false
+	  # Convert the two version strings to be compared into a format that
+  # allows a simple string comparison.  The end result is that a version
+  # string of the form 1.12.5-r617 will be converted to the form
+  # 0001001200050617.  In other words, each number is zero padded to four
+  # digits, and non digits are removed.
+  AS_VAR_PUSHDEF([A],[ax_compare_version_A])
+  A=`echo "$1" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \
+                     -e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \
+                     -e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \
+                     -e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \
+                     -e 's/[[^0-9]]//g'`
+
+  AS_VAR_PUSHDEF([B],[ax_compare_version_B])
+  B=`echo "$3" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \
+                     -e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \
+                     -e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \
+                     -e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \
+                     -e 's/[[^0-9]]//g'`
+
+  dnl # In the case of le, ge, lt, and gt, the strings are sorted as necessary
+  dnl # then the first line is used to determine if the condition is true.
+  dnl # The sed right after the echo is to remove any indented white space.
+  m4_case(m4_tolower($2),
+  [lt],[
+    ax_compare_version=`echo "x$A
+x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/false/;s/x${B}/true/;1q"`
+  ],
+  [gt],[
+    ax_compare_version=`echo "x$A
+x$B" | sed 's/^ *//' | sort | sed "s/x${A}/false/;s/x${B}/true/;1q"`
+  ],
+  [le],[
+    ax_compare_version=`echo "x$A
+x$B" | sed 's/^ *//' | sort | sed "s/x${A}/true/;s/x${B}/false/;1q"`
+  ],
+  [ge],[
+    ax_compare_version=`echo "x$A
+x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/true/;s/x${B}/false/;1q"`
+  ],[
+    dnl Split the operator from the subversion count if present.
+    m4_bmatch(m4_substr($2,2),
+    [0],[
+      # A count of zero means use the length of the shorter version.
+      # Determine the number of characters in A and B.
+      ax_compare_version_len_A=`echo "$A" | awk '{print(length)}'`
+      ax_compare_version_len_B=`echo "$B" | awk '{print(length)}'`
+
+      # Set A to no more than B's length and B to no more than A's length.
+      A=`echo "$A" | sed "s/\(.\{$ax_compare_version_len_B\}\).*/\1/"`
+      B=`echo "$B" | sed "s/\(.\{$ax_compare_version_len_A\}\).*/\1/"`
+    ],
+    [[0-9]+],[
+      # A count greater than zero means use only that many subversions
+      A=`echo "$A" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"`
+      B=`echo "$B" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"`
+    ],
+    [.+],[
+      AC_WARNING(
+        [illegal OP numeric parameter: $2])
+    ],[])
+
+    # Pad zeros at end of numbers to make same length.
+    ax_compare_version_tmp_A="$A`echo $B | sed 's/./0/g'`"
+    B="$B`echo $A | sed 's/./0/g'`"
+    A="$ax_compare_version_tmp_A"
+
+    # Check for equality or inequality as necessary.
+    m4_case(m4_tolower(m4_substr($2,0,2)),
+    [eq],[
+      test "x$A" = "x$B" && ax_compare_version=true
+    ],
+    [ne],[
+      test "x$A" != "x$B" && ax_compare_version=true
+    ],[
+      AC_WARNING([illegal OP parameter: $2])
+    ])
+  ])
+
+  AS_VAR_POPDEF([A])dnl
+  AS_VAR_POPDEF([B])dnl
+
+  dnl # Execute ACTION-IF-TRUE / ACTION-IF-FALSE.
+  if test "$ax_compare_version" = "true" ; then
+    m4_ifvaln([$4],[$4],[:])dnl
+    m4_ifvaln([$5],[else $5])dnl
+  fi
+]) dnl AX_COMPARE_VERSION
diff --git a/cgm b/cgm
index 423cbc5..71e80cd 100755
--- a/cgm
+++ b/cgm
@@ -45,6 +45,8 @@ usage() {
 	echo
 	echo "   $me listchildren <controller> <cgroup>"
 	echo
+	echo "   $me removeonempty <controller> <cgroup>"
+	echo
 	echo "   $me apiversion"
 	echo ""
 	echo " Replace '<controller>' with the desired controller, i.e."
@@ -168,6 +170,19 @@ case "$1" in
 		dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.ListChildren string:$2 string:$3
 		exit $?
 	;;
+	removeonempty)
+		if [ $# -lt 3 ]; then
+			usage $0
+		fi
+		if [ "$2" = "all" ]; then
+			for cg in `awk '!/^#/ { print $1 }' /proc/cgroups`; do
+				dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.RemoveOnEmpty string:$cg string:$3
+			done
+		else
+			dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.RemoveOnEmpty string:$2 string:$3
+		fi
+		exit $?
+	;;
 	apiversion)
 		dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock /org/linuxcontainers/cgmanager org.freedesktop.DBus.Properties.Get string:'org.linuxcontainers.cgmanager0_0' string:'api_version'
 		exit $?
diff --git a/cgm-release-agent.c b/cgm-release-agent.c
new file mode 100644
index 0000000..b834465
--- /dev/null
+++ b/cgm-release-agent.c
@@ -0,0 +1,92 @@
+/* cgmanager
+ *
+ * Copyright © 2014 Canonical
+ * Author: Serge Hallyn <serge.hallyn at ubuntu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "cgmanager.h"
+#include "cgmanager-client.h"
+
+#include <nih-dbus/dbus_connection.h>
+#include "cgmanager-client.h"
+#include <nih/alloc.h>
+#include <nih/error.h>
+#include <nih/logging.h>
+#include <nih/string.h>
+
+#define CG_REMOVE_RECURSIVE 1
+
+int do_remove_cgroup(const char *controller, const char *cgroup)
+{
+	DBusError dbus_error;
+	DBusConnection *connection;
+	dbus_error_init(&dbus_error);
+	NihDBusProxy *cgroup_manager = NULL;
+
+	connection = dbus_connection_open_private(CGMANAGER_DBUS_PATH, &dbus_error);
+	if (!connection) {
+		nih_error("Failed opening dbus connection: %s: %s",
+				dbus_error.name, dbus_error.message);
+		dbus_error_free(&dbus_error);
+		return -1;
+	}
+	if (nih_dbus_setup(connection, NULL) < 0) {
+		NihError *nerr;
+		nerr = nih_error_get();
+		nih_free(nerr);
+		dbus_error_free(&dbus_error);
+		dbus_connection_unref(connection);
+		return -1;
+	}
+	dbus_error_free(&dbus_error);
+	cgroup_manager = nih_dbus_proxy_new(NULL, connection,
+				NULL /* p2p */,
+				"/org/linuxcontainers/cgmanager", NULL, NULL);
+	dbus_connection_unref(connection);
+	if (!cgroup_manager) {
+		NihError *nerr;
+		nerr = nih_error_get();
+		nih_free(nerr);
+		return -1;
+	}
+
+	int existed;
+	if ( cgmanager_remove_sync(NULL, cgroup_manager, controller,
+				   cgroup, CG_REMOVE_RECURSIVE, &existed) != 0) {
+		NihError *nerr;
+		nerr = nih_error_get();
+		nih_free(nerr);
+	}
+	nih_free(cgroup_manager);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	char *p;
+
+	nih_assert (argv[1] != NULL);
+
+	p = strstr(argv[0], ".");
+	if (!p)
+		return -1;
+
+	/* controller is now in *(p+1), cgroup is in argv[1] */
+	return do_remove_cgroup(p+1, argv[1]);
+}
diff --git a/cgmanager-proxy.c b/cgmanager-proxy.c
index d6e2113..49f6343 100644
--- a/cgmanager-proxy.c
+++ b/cgmanager-proxy.c
@@ -837,6 +837,57 @@ out:
 	return ret;
 }
 
+int remove_on_empty_main (const char *controller, const char *cgroup,
+		struct ucred p, struct ucred r)
+{
+	DBusMessage *message;
+	DBusMessageIter iter;
+	int sv[2], ret = -1;
+	char buf[1];
+
+	if (memcmp(&p, &r, sizeof(struct ucred)) != 0) {
+		nih_error("%s: proxy != requestor", __func__);
+		return -1;
+	}
+
+	if (!sane_cgroup(cgroup)) {
+		nih_error("%s: unsafe cgroup", __func__);
+		return -1;
+	}
+
+	if (!(message = start_dbus_request("RemoveOnEmptyScm", sv))) {
+		nih_error("%s: error starting dbus request", __func__);
+		return -1;
+	}
+
+	dbus_message_iter_init_append(message, &iter);
+	if (! dbus_message_iter_append_basic (&iter, DBUS_TYPE_STRING, &controller)) {
+		nih_error("%s: out of memory", __func__);
+		goto out;
+	}
+	if (! dbus_message_iter_append_basic (&iter, DBUS_TYPE_STRING, &cgroup)) {
+		nih_error("%s: out of memory", __func__);
+		goto out;
+	}
+	if (! dbus_message_iter_append_basic (&iter, DBUS_TYPE_UNIX_FD, &sv[1])) {
+		nih_error("%s: out of memory", __func__);
+		goto out;
+	}
+
+	if (!complete_dbus_request(message, sv, &r, NULL)) {
+		nih_error("%s: error completing dbus request", __func__);
+		goto out;
+	}
+
+	if (recv(sv[0], buf, 1, 0) == 1 && (*buf == '1'))
+		ret = 0;
+out:
+	close(sv[0]);
+	close(sv[1]);
+	return ret;
+}
+
+
 /**
  * options:
  *
diff --git a/cgmanager.c b/cgmanager.c
index 520e3b0..a532f47 100644
--- a/cgmanager.c
+++ b/cgmanager.c
@@ -666,6 +666,58 @@ int list_children_main(void *parent, const char *controller, const char *cgroup,
 	return get_child_directories(parent, path, output);
 }
 
+int remove_on_empty_main(const char *controller, const char *cgroup,
+		struct ucred p, struct ucred r)
+{
+	char rcgpath[MAXPATHLEN];
+	size_t cgroup_len;
+	nih_local char *working = NULL, *wcgroup = NULL;
+
+	if (!sane_cgroup(cgroup)) {
+		nih_error("%s: unsafe cgroup", __func__);
+		return -1;
+	}
+
+	// Get r's current cgroup in rcgpath
+	if (!compute_pid_cgroup(r.pid, controller, "", rcgpath, NULL)) {
+		nih_error("%s: Could not determine the requested cgroup", __func__);
+		return -1;
+	}
+
+	cgroup_len = strlen(cgroup);
+
+	if (strlen(rcgpath) + cgroup_len > MAXPATHLEN) {
+		nih_error("%s: Path name too long", __func__);
+		return -1;
+	}
+
+	wcgroup = NIH_MUST( nih_strdup(NULL, cgroup) );
+	if (!normalize_path(wcgroup))
+		return -1;
+
+	working = NIH_MUST( nih_strdup(NULL, rcgpath) );
+	NIH_MUST( nih_strcat(&working, NULL, "/") );
+	NIH_MUST( nih_strcat(&working, NULL, wcgroup) );
+
+	if (!dir_exists(working)) {
+		return -1;
+	}
+	// must have write access
+	if (!may_access(r.pid, r.uid, r.gid, working, O_WRONLY)) {
+		nih_error("%s: pid %d (%u:%u) may not remove %s", __func__,
+			r.pid, r.uid, r.gid, working);
+		return -1;
+	}
+
+	NIH_MUST( nih_strcat(&working, NULL, "/notify_on_release") );
+
+	if (!set_value_trusted(working, "1\n")) {
+		nih_error("Failed to set remove_on_empty for %s:%s", controller, working);
+		return -1;
+	}
+
+	return 0;
+}
 char *extra_cgroup_mounts;
 
 static int
@@ -783,7 +835,23 @@ main (int argc, char *argv[])
 				  client_disconnect);
 	nih_assert (server != NULL);
 
-	if (setup_cgroup_mounts(extra_cgroup_mounts) < 0) {
+	if (!setup_base_run_path()) {
+		nih_fatal("Error setting up base cgroup path");
+		return -1;
+	}
+
+	if (collect_subsystems(extra_cgroup_mounts) < 0)
+	{
+		nih_fatal("failed to collect cgroup subsystems");
+		exit(1);
+	}
+
+	if (!create_agent_symlinks()) {
+		nih_fatal("Error creating release agent symlinks");
+		exit(1);
+	}
+
+	if (setup_cgroup_mounts() < 0) {
 		nih_fatal ("Failed to set up cgroup mounts");
 		exit(1);
 	}
diff --git a/configure.ac b/configure.ac
index ca458d1..8ddfa52 100644
--- a/configure.ac
+++ b/configure.ac
@@ -22,4 +22,6 @@ PKG_CHECK_MODULES([DBUS], [dbus-1 >= 1.2.16])
 
 AC_PATH_PROG(HELP2MAN, help2man, false // No help2man //)
 
+AS_AC_EXPAND(SBINDIR, "$sbindir")
+
 AC_OUTPUT
diff --git a/frontend.c b/frontend.c
index 42b913e..1b04fe8 100644
--- a/frontend.c
+++ b/frontend.c
@@ -76,6 +76,7 @@ static struct scm_sock_data *alloc_scm_sock_data(NihDBusMessage *message,
 	return d;
 }
 
+#if 0
 static const char *req_type_to_str(enum req_type r)
 {
 	switch(r) {
@@ -90,9 +91,11 @@ static const char *req_type_to_str(enum req_type r)
 		case REQ_TYPE_GET_TASKS: return "get_tasks";
 		case REQ_TYPE_CHMOD: return "chmod";
 		case REQ_TYPE_LIST_CHILDREN: return "list_children";
+		case REQ_TYPE_REMOVE_ON_EMPTY: return "remove_on_empty";
 		default: return "invalid";
 	}
 }
+#endif
 
 /*
  * All Scm-enhanced transactions take at least one SCM cred,
@@ -188,6 +191,7 @@ static void sock_scm_reader(struct scm_sock_data *data,
 	case REQ_TYPE_REMOVE: remove_scm_complete(data); break;
 	case REQ_TYPE_GET_TASKS: get_tasks_scm_complete(data); break;
 	case REQ_TYPE_LIST_CHILDREN: list_children_scm_complete(data); break;
+	case REQ_TYPE_REMOVE_ON_EMPTY: remove_on_empty_scm_complete(data); break;
 	default:
 		nih_fatal("%s: bad req_type %d", __func__, data->type);
 		exit(1);
@@ -1304,6 +1308,91 @@ int cgmanager_list_children (void *data, NihDBusMessage *message,
 	return ret;
 }
 
+void remove_on_empty_scm_complete(struct scm_sock_data *data)
+{
+	char b = '0';
+
+	if (remove_on_empty_main(data->controller, data->cgroup, data->pcred,
+				data->rcred) == 0)
+		b = '1';
+	if (write(data->fd, &b, 1) < 0)
+		nih_error("RemoveOnEmptyScm: Error writing final result to client");
+}
+
+int cgmanager_remove_on_empty_scm (void *data, NihDBusMessage *message,
+		 const char *controller, const char *cgroup, int sockfd)
+{
+	struct scm_sock_data *d;
+
+	d = alloc_scm_sock_data(message, sockfd, REQ_TYPE_REMOVE_ON_EMPTY);
+	if (!d)
+		return -1;
+	d->controller = NIH_MUST( nih_strdup(d, controller) );
+	d->cgroup = NIH_MUST( nih_strdup(d, cgroup) );
+
+	if (!nih_io_reopen(NULL, sockfd, NIH_IO_MESSAGE,
+				(NihIoReader) sock_scm_reader,
+				(NihIoCloseHandler) scm_sock_close,
+				scm_sock_error_handler, d)) {
+		NihError *error = nih_error_steal ();
+		nih_dbus_error_raise_printf (DBUS_ERROR_INVALID_ARGS,
+			"Failed queue scm message: %s", error->message);
+		nih_free(error);
+		return -1;
+	}
+	if (!kick_fd_client(sockfd)) {
+		nih_dbus_error_raise_printf (DBUS_ERROR_INVALID_ARGS,
+			"Error writing to client: %s", strerror(errno));
+		return -1;
+	}
+	return 0;
+}
+
+/* 
+ * This is one of the dbus callbacks.
+ * Caller requests that cgroup @cgroup in controller @controller be
+ * marked to be removed when it becomes empty, meaning there are no
+ * more sub-cgroups and no tasks.
+ */
+int cgmanager_remove_on_empty (void *data, NihDBusMessage *message,
+		const char *controller, const char *cgroup)
+{
+	int fd = 0, ret;
+	struct ucred rcred;
+	socklen_t len;
+
+	if (message == NULL) {
+		nih_dbus_error_raise_printf (DBUS_ERROR_INVALID_ARGS,
+			"message was null");
+		return -1;
+	}
+
+	if (!dbus_connection_get_socket(message->connection, &fd)) {
+		nih_dbus_error_raise_printf (DBUS_ERROR_INVALID_ARGS,
+					     "Could not get client socket.");
+		return -1;
+	}
+
+	len = sizeof(struct ucred);
+	if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &rcred, &len) < 0) {
+		nih_dbus_error_raise_printf (DBUS_ERROR_INVALID_ARGS,
+					     "Could not get peer cred: %s",
+					     strerror(errno));
+		return -1;
+	}
+
+	nih_info (_("RemoveOnEmpty: Client fd is: %d (pid=%d, uid=%u, gid=%u)"),
+			fd, rcred.pid, rcred.uid, rcred.gid);
+
+	ret = remove_on_empty_main(controller, cgroup, rcred, rcred);
+	if (ret >= 0)
+		ret = 0;
+	else
+		nih_dbus_error_raise_printf (DBUS_ERROR_INVALID_ARGS,
+					     "invalid request");
+	return ret;
+}
+
 int
 cgmanager_get_api_version(void *data, NihDBusMessage *message, int *version)
 {
diff --git a/frontend.h b/frontend.h
index 935231f..0c6c1f6 100644
--- a/frontend.h
+++ b/frontend.h
@@ -103,6 +103,7 @@ enum req_type {
 	REQ_TYPE_CHMOD,
 	REQ_TYPE_MOVE_PID_ABS,
 	REQ_TYPE_LIST_CHILDREN,
+	REQ_TYPE_REMOVE_ON_EMPTY,
 };
 
 int get_pid_cgroup_main(void *parent, const char *controller,
@@ -141,6 +142,10 @@ int list_children_main (void *parent, const char *controller, const char *cgroup
 		struct ucred p, struct ucred r, char ***output);
 void list_children_scm_complete(struct scm_sock_data *data);
 
+int remove_on_empty_main (const char *controller, const char *cgroup,
+		struct ucred p, struct ucred r);
+void remove_on_empty_scm_complete(struct scm_sock_data *data);
+
 int cgmanager_ping (void *data, NihDBusMessage *message, int junk);
 
 int client_connect (DBusServer *server, DBusConnection *conn);
diff --git a/fs.c b/fs.c
index e4bccbd..c245964 100644
--- a/fs.c
+++ b/fs.c
@@ -49,16 +49,39 @@
 #include <nih-dbus/dbus_connection.h>
 #include <nih-dbus/dbus_proxy.h>
 
+/* defines relating to the release agent */
+#define AGENT SBINDIR "/cgm-release-agent"
+#define AGENT_LINK_PATH "/run/cgmanager/agents"
+
 struct controller_mounts {
 	char *controller;
 	char *options;
 	char *path;
+	char *src;
+	char *agent;
 };
 
 static struct controller_mounts *all_mounts;
 static int num_controllers;
 
 static char *base_path;
+
+bool file_exists(const char *path)
+{
+	struct stat sb;
+	if (stat(path, &sb) < 0)
+		return false;
+	return true;
+}
+
+bool dir_exists(const char *path)
+{
+	struct stat sb;
+	if (stat(path, &sb) < 0 || !S_ISDIR(sb.st_mode))
+		return false;
+	return true;
+}
+
 /*
  * Where do we want to mount the controllers?  We used to mount
  * them under a tmpfs under /sys/fs/cgroup, for all to share.  Now
@@ -66,7 +89,7 @@ static char *base_path;
  * TODO read this from configuration file too
  * TODO do we want to create these in a tmpfs?
  */
-static bool setup_base_path(void)
+bool setup_base_run_path(void)
 {
 	base_path = strdup("/run/cgmanager/fs");
 	if (!base_path) {
@@ -85,6 +108,10 @@ static bool setup_base_path(void)
 		nih_fatal("%s: failed to create /run/cgmanager/fs", __func__);
 		return false;
 	}
+	if (mkdir(AGENT_LINK_PATH, 0755) < 0 && errno != EEXIST) {
+		nih_fatal("%s: failed to create %s", __func__, AGENT_LINK_PATH);
+		return false;
+	}
 	return true;
 }
 
@@ -128,7 +155,7 @@ static void set_use_hierarchy(const char *path)
 	fclose(f);
 }
 
-static bool do_mount_subsys(char *s)
+static bool save_mount_subsys(char *s)
 {
 	struct controller_mounts *tmp;
 	char *src, dest[MAXPATHLEN], *controller;
@@ -157,15 +184,6 @@ static bool do_mount_subsys(char *s)
 		ret = -1;
 		goto out;
 	}
-	if (mkdir(dest, 0755) < 0 && errno != EEXIST) {
-		nih_fatal("Failed to create %s: %s", dest, strerror(errno));
-		ret = -1;
-		goto out;
-	}
-	if ((ret = mount(src, dest, "cgroup", 0, src)) < 0) {
-		nih_fatal("Failed mounting %s: %s", s, strerror(errno));
-		goto out;
-	}
 	ret = -1;
 	tmp = realloc(all_mounts, (num_controllers+1) * sizeof(*all_mounts));
 	if (!tmp) {
@@ -180,58 +198,99 @@ static bool do_mount_subsys(char *s)
 	}
 	all_mounts[num_controllers].options = NULL;
 	all_mounts[num_controllers].path = strdup(dest);
-	if (!all_mounts[num_controllers].path) {
+	all_mounts[num_controllers].src = strdup(src);
+	if (!all_mounts[num_controllers].path ||
+			!all_mounts[num_controllers].src) {
 		nih_fatal("Out of memory mounting controllers");
 		goto out;
 	}
 	nih_info(_("Mounted %s onto %s"),
 			all_mounts[num_controllers].controller,
 			all_mounts[num_controllers].path);
-	if (strcmp(all_mounts[num_controllers].controller, "cpuset") == 0) {
+	num_controllers++;
+	return true;
+
+out:
+	return false;
+}
+
+static bool set_release_agent(struct controller_mounts *m)
+{
+	FILE *f;
+	char path[MAXPATHLEN];
+	int ret;
+
+	ret = snprintf(path, MAXPATHLEN, "%s/release_agent", m->path);
+	if (ret < 0 || ret >= MAXPATHLEN) {
+		nih_error("out of memory");
+		return false;
+	}
+	if ((f = fopen(path, "w")) == NULL) {
+		nih_error("failed to open %s for writing", path);
+		return false;
+	}
+	if (fprintf(f, "%s\n", m->agent) < 0) {
+		nih_error("failed to set release agent for %s",
+				m->controller);
+		fclose(f);
+		return false;
+	}
+	if (fclose(f) != 0) {
+		nih_error("failed to set release agent for %s",
+				m->controller);
+		return false;
+	}
+	return true;
+}
+
+static bool do_mount_subsys(int i)
+{
+	char *src, *dest, *controller;
+	int ret;
+
+	dest = all_mounts[i].path;
+	controller = all_mounts[i].controller;
+	src = all_mounts[i].src;
+
+	if (mkdir(dest, 0755) < 0 && errno != EEXIST) {
+		nih_fatal("Failed to create %s: %s", dest, strerror(errno));
+		ret = -1;
+		goto out;
+	}
+	if ((ret = mount(src, dest, "cgroup", 0, src)) < 0) {
+		nih_fatal("Failed mounting %s: %s", dest, strerror(errno));
+		goto out;
+	}
+	nih_info(_("Mounted %s onto %s"), controller, dest);
+	if (strcmp(controller, "cpuset") == 0) {
 		set_clone_children(dest); // TODO make this optional?
 		nih_info(_("set clone_children"));
-	} else if (strcmp(all_mounts[num_controllers].controller, "memory") == 0) {
+	} else if (strcmp(controller, "memory") == 0) {
 		set_use_hierarchy(dest);  // TODO make this optional?
 		nih_info(_("set memory.use_hierarchy"));
 	}
-	num_controllers++;
+
+	if (!set_release_agent(&all_mounts[i])) {
+		nih_error("failed to set release agent for %s",
+				all_mounts[i].controller);
+		return false;
+	}
 	return true;
 
 out:
 	return false;
 }
 
-/**
- * Mount the cgroup filesystems and record the information.
- * This should take configuration data from /etc.  For now,
- * Just mount all controllers, separately just as cgroup-lite
- * does, and set the use_hierarchy and clone_children options.
- *
- * Things which should go into configuration file:
- * . which controllers to mount
- * . which controllers to co-mount
- * . any mount options (per-controller)
- * . values for sane_behavior, use_hierarchy, and clone_children
- */
-int setup_cgroup_mounts(char *extra_mounts)
+int collect_subsystems(char *extra_mounts)
 {
 	FILE *cgf;
 	int ret;
 	char line[400];
 
-	if (unshare(CLONE_NEWNS) < 0) {
-		nih_fatal("Failed to unshare a private mount ns: %s", strerror(errno));
-		return -1;
-	}
-	if (!setup_base_path()) {
-		nih_fatal("Error setting up base cgroup path");
-		return -1;
-	}
-
 	if (extra_mounts) {
 		char *e;
 		for (e = strtok(extra_mounts, ","); e; e = strtok(NULL, ",")) {
-			if (!do_mount_subsys(e)) {
+			if (!save_mount_subsys(e)) {
 				nih_fatal("Error loading subsystem \"%s\"", e);
 				return -1;
 			}
@@ -261,19 +320,99 @@ int setup_cgroup_mounts(char *extra_mounts)
 #endif
 		}
 
-		if (!do_mount_subsys(line)) {
-			nih_fatal("Error mounting subsystem %s", line);
+		if (!save_mount_subsys(line)) {
+			nih_fatal("Error storing subsystem %s", line);
 			ret = -1;
 			goto out;
 		}
 	}
-	nih_info(_("mounted %d controllers"), num_controllers);
+	nih_info(_("found %d controllers"), num_controllers);
 	ret = 0;
 out:
 	fclose(cgf);
 	return ret;
 }
 
+/**
+ * Mount the cgroup filesystems and record the information.
+ * This should take configuration data from /etc.  For now,
+ * Just mount all controllers, separately just as cgroup-lite
+ * does, and set the use_hierarchy and clone_children options.
+ *
+ * Things which should go into configuration file:
+ * . which controllers to mount
+ * . which controllers to co-mount
+ * . any mount options (per-controller)
+ * . values for sane_behavior, use_hierarchy, and clone_children
+ */
+int setup_cgroup_mounts(void)
+{
+	int i;
+
+	if (unshare(CLONE_NEWNS) < 0) {
+		nih_fatal("Failed to unshare a private mount ns: %s", strerror(errno));
+		return 0;
+	}
+
+	for (i=0; i<num_controllers; i++) {
+		if (!do_mount_subsys(i)) {
+			nih_fatal("Failed mounting cgroups");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * In the old release agent support, the release agent is not told the
+ * controller in which the cgroup was freed.  Therefore we need to have a
+ * different binary for each mounted controller.  We will create these under
+ * /run/cgmanager/agents/ as symlinks to /sbin/cgm-release-agent, i.e.
+ * /run/cgmanager/agents/cgm-release-agent.freezer.
+ */
+bool create_agent_symlinks(void)
+{
+	struct stat statbuf;
+	char buf[MAXPATHLEN];
+	int i, ret, plen;
+
+	ret = stat(AGENT, &statbuf);
+	if (ret < 0) {
+		nih_error("release agent not found");
+		return false;
+	}
+
+	plen = snprintf(buf, MAXPATHLEN, "%s/", AGENT_LINK_PATH);
+	if (plen < 0 || plen >= MAXPATHLEN) {
+		nih_error("memory error");
+		return false;
+	}
+
+	for (i=0; i<num_controllers; i++) {
+		ret = snprintf(buf+plen, MAXPATHLEN-plen, "cgm-release-agent.%s",
+				all_mounts[i].controller);
+		if (ret < 0 || ret >= MAXPATHLEN) {
+			nih_error("path names too long");
+			return false;
+		}
+		nih_info("buf is %s", buf);
+		if (!file_exists(buf)) {
+			if (symlink(AGENT, buf) < 0) {
+				nih_error("failed to create release agent for %s",
+					all_mounts[i].controller);
+				return false;
+			}
+		}
+		if ((all_mounts[i].agent = strdup(buf)) == NULL) {
+			nih_error("out of memory");
+			return false;
+		}
+	}
+
+	return true;
+}
+
 static inline void drop_newlines(char *s)
 {
 	int l;
@@ -752,12 +891,16 @@ bool chmod_cgroup_path(const char *path, int mode)
 /*
  * TODO - make this more baroque to allow ranges etc
  */
-static char *set_value_blacklist[] = { "tasks", "release-agent", "cgroup.procs" };
+static char *set_value_blacklist[] = { "tasks",
+	"release-agent",
+	"cgroup.procs",
+	"notify-on-release"
+};
 static size_t blacklist_len = sizeof(set_value_blacklist)/sizeof(char *);
 
-bool set_value(const char *path, const char *value)
+bool set_value_trusted(const char *path, const char *value)
 {
-	int i, len;
+	int len;
 	FILE *f;
 
 	nih_assert (path);
@@ -767,17 +910,6 @@ bool set_value(const char *path, const char *value)
 
 	len = strlen(value);
 
-	for (i = 0; i < blacklist_len; i++) {
-		const char *p = strrchr(path, '/');
-		if (p)
-			p++;
-		else
-			p = path;
-		if (strcmp(p, set_value_blacklist[i]) == 0) {
-			nih_error("attempted write to %s", set_value_blacklist[i]);
-			return false;
-		}
-	}
 	if ((f = fopen(path, "w")) == NULL) {
 		nih_error("Error opening %s for writing", path);
 		return false;
@@ -796,6 +928,26 @@ bool set_value(const char *path, const char *value)
 	}
 	return true;
 }
+bool set_value(const char *path, const char *value)
+{
+	int i;
+
+	nih_assert (path);
+
+	for (i = 0; i < blacklist_len; i++) {
+		const char *p = strrchr(path, '/');
+		if (p)
+			p++;
+		else
+			p = path;
+		if (strcmp(p, set_value_blacklist[i]) == 0) {
+			nih_error("attempted write to %s", set_value_blacklist[i]);
+			return false;
+		}
+	}
+
+	return set_value_trusted(path, value);
+}
 
 /*
  * Tiny helper to read the /proc/pid/ns/pid link for a given pid.
@@ -851,22 +1003,6 @@ bool realpath_escapes(char *path, char *safety)
 	return false;
 }
 
-bool file_exists(const char *path)
-{
-	struct stat sb;
-	if (stat(path, &sb) < 0)
-		return false;
-	return true;
-}
-
-bool dir_exists(const char *path)
-{
-	struct stat sb;
-	if (stat(path, &sb) < 0 || !S_ISDIR(sb.st_mode))
-		return false;
-	return true;
-}
-
 /*
  * move_self_to_root: called by cgmanager at startup to make sure
  * it starts in /
diff --git a/fs.h b/fs.h
index d595613..8e60ef3 100644
--- a/fs.h
+++ b/fs.h
@@ -24,7 +24,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-int setup_cgroup_mounts(char *extra_mounts);
+int collect_subsystems(char *extra_mounts);
+int setup_cgroup_mounts(void);
 bool compute_pid_cgroup(pid_t pid, const char *controller, const char *cgroup,
 		char *path, int *depth);
 int get_path_depth(const char *path);
@@ -38,6 +39,7 @@ bool hostuid_to_ns(uid_t uid, pid_t pid, uid_t *answer);
 bool chown_cgroup_path(const char *path, uid_t uid, gid_t gid, bool all_children);
 bool chmod_cgroup_path(const char *path, int mode);
 bool set_value(const char *path, const char *value);
+bool set_value_trusted(const char *path, const char *value);
 unsigned long read_pid_ns_link(int pid);
 unsigned long read_user_ns_link(int pid);
 bool realpath_escapes(char *path, char *safety);
@@ -45,3 +47,5 @@ bool file_exists(const char *path);
 bool dir_exists(const char *path);
 bool move_self_to_root(void);
 int get_child_directories(void *parent, const char *path, char ***output);
+bool setup_base_run_path(void);
+bool create_agent_symlinks(void);
diff --git a/org.linuxcontainers.cgmanager.xml b/org.linuxcontainers.cgmanager.xml
index 619125b..d20af88 100644
--- a/org.linuxcontainers.cgmanager.xml
+++ b/org.linuxcontainers.cgmanager.xml
@@ -171,8 +171,16 @@
       <arg name="cgroup" type="s" direction="in" />
       <arg name="output" type="as" direction="out" />
     </method>
+    <method name="RemoveOnEmptyScm">
+      <arg name="controller" type="s" direction="in" />
+      <arg name="cgroup" type="s" direction="in" />
+      <arg name="sockfd" type="h" direction="in" />
+    </method>
+    <method name="RemoveOnEmpty">
+      <arg name="controller" type="s" direction="in" />
+      <arg name="cgroup" type="s" direction="in" />
+    </method>
     <!-- still to add: low priority,
-	 Prune (remove all empty decendents)
 	 removeWhenEmpty
 	 getEventfd
 	 -->
diff --git a/tests/test21.sh b/tests/test21.sh
new file mode 100755
index 0000000..9c11172
--- /dev/null
+++ b/tests/test21.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+echo "Test 21: remove_on_empty"
+
+cg="test21_cg"
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.Remove string:'memory' string:$cg || true
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.Remove string:'devices' string:$cg || true
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.Create string:'memory' string:$cg || true
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.Create string:'devices' string:$cg || true
+sleep 200 &
+pid=$!
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.MovePid string:'memory' string:$cg int32:$pid
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.MovePid string:'devices' string:$cg int32:$pid
+
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.RemoveOnEmpty string:'memory' string:$cg
+
+kill $pid
+
+# now $cg should be deleted in memory, but not in devices
+# note if logind or upstart has set this for us then this will raise a false positive
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.GetTasks string:'devices' string:$cg >/dev/null 2>&1
+if [ $? -ne 0 ]; then
+	echo "Remove-on-empty affected another cgroup"
+	exit 1
+fi
+
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.GetTasks string:'memory' string:$cg >/dev/null 2>&1
+if [ $? -eq 0 ]; then
+	echo "Failed to remove-on-empty"
+	exit 1
+fi
+
+echo "Test 21 (remove_on_empty) passed"
+exit 0
-- 
1.9.1



More information about the cgmanager-devel mailing list