[cgmanager-devel] [PATCH 1/1] RFC: implement release-on-empty

Serge Hallyn serge.hallyn at ubuntu.com
Sat Mar 29 00:04:02 UTC 2014


As the new kernel feature for release-on-empty is not yet complete,
this uses the old notify-on-release hook feature.  It creates a
/sbin/cgm-release-agent program.  When cgmanager starts up, it
creates symlinks from that program to 'cgm-release-agent.$controller'
for each controller under /run/cgmanager/agents.  This way it
can tell by the invoked program name which controller the to-be-removed
cgroup belogns to.

As a part of this the mounting of subsystems is broken up into
collecting the subsystems, then setting release agents, and
finally mounting the subsystems into our private namespace.  The
original reason for this turned out not to be valid, but the resulting
code is cleaner so I'm leaving it.

Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
---
 .gitignore                        |   1 +
 Makefile.am                       |   8 +-
 acinclude.m4                      | 137 +++++++++++++++++++
 cgm                               |  15 ++
 cgm-release-agent.c               |  92 +++++++++++++
 cgmanager-proxy.c                 |  51 +++++++
 cgmanager.c                       |  70 +++++++++-
 configure.ac                      |   2 +
 frontend.c                        |  89 ++++++++++++
 frontend.h                        |   5 +
 fs.c                              | 278 ++++++++++++++++++++++++++++----------
 fs.h                              |   6 +-
 org.linuxcontainers.cgmanager.xml |  10 +-
 tests/test21.sh                   |  34 +++++
 14 files changed, 722 insertions(+), 76 deletions(-)
 create mode 100644 acinclude.m4
 create mode 100644 cgm-release-agent.c
 create mode 100755 tests/test21.sh

diff --git a/.gitignore b/.gitignore
index 0a408cd..7decb58 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,3 +51,4 @@ cgm.1
 tests/*.o
 tests/cgm-concurrent
 00*
+cgm-release-agent
diff --git a/Makefile.am b/Makefile.am
index 5d62ada..8023250 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -7,7 +7,7 @@ VERSION_AGE      = 0
 LIBCGMANAGER_VERSION        = $(VERSION_CURRENT):$(VERSION_REVISION):$(VERSION_AGE)
 LIBCGMANAGER_VERSION_DOTTED = $(VERSION_CURRENT).$(VERSION_REVISION).$(VERSION_AGE)
 
-AM_CFLAGS = -Wall -ggdb -D_GNU_SOURCE
+AM_CFLAGS = -Wall -ggdb -D_GNU_SOURCE -DSBINDIR=\"$(SBINDIR)\"
 AM_CFLAGS += $(DBUS_CFLAGS) $(NIH_CFLAGS) $(NIH_DBUS_CFLAGS)
 AM_LDFLAGS = $(DBUS_LIBS) $(NIH_LIBS) $(NIH_DBUS_LIBS)
 client_files_OUTPUTS = cgmanager-client.c cgmanager-client.h
@@ -19,10 +19,11 @@ CLEANFILES = \
 	$(manager_files_OUTPUTS) \
 	$(client_files_OUTPUTS) \
 	cgmanager cgproxy \
+	cgm-release-agent  \
 	*.o *.so \
 	libcgmanager.pc.in
 
-sbin_PROGRAMS = cgmanager cgproxy
+sbin_PROGRAMS = cgmanager cgproxy cgm-release-agent
 
 bin_SCRIPTS = cgm
 
@@ -72,6 +73,9 @@ cgproxy_SOURCES = cgmanager-proxy.c \
 	fs.c fs.h cgmanager.h \
 	frontend.c frontend.h
 
+cgm_release_agent_SOURCES = cgm-release-agent.c
+cgm_release_agent_LDADD = -lcgmanager
+
 libcgmanager_la_SOURCES = \
 	$(client_files_OUTPUTS) \
 	org.linuxcontainers.cgmanager.xml
diff --git a/acinclude.m4 b/acinclude.m4
new file mode 100644
index 0000000..d718b5e
--- /dev/null
+++ b/acinclude.m4
@@ -0,0 +1,137 @@
+dnl as-ac-expand.m4 0.2.0
+dnl autostars m4 macro for expanding directories using configure's prefix
+dnl thomas at apestaart.org
+dnl
+
+dnl AS_AC_EXPAND(VAR, CONFIGURE_VAR)
+dnl example
+dnl AS_AC_EXPAND(SYSCONFDIR, $sysconfdir)
+dnl will set SYSCONFDIR to /usr/local/etc if prefix=/usr/local
+
+AC_DEFUN([AS_AC_EXPAND],
+[
+    EXP_VAR=[$1]
+    FROM_VAR=[$2]
+
+    dnl first expand prefix and exec_prefix if necessary
+    prefix_save=$prefix
+    exec_prefix_save=$exec_prefix
+
+    dnl if no prefix given, then use /usr/local, the default prefix
+    if test "x$prefix" = "xNONE"; then
+        prefix="$ac_default_prefix"
+    fi
+    dnl if no exec_prefix given, then use prefix
+    if test "x$exec_prefix" = "xNONE"; then
+        exec_prefix=$prefix
+    fi
+
+    full_var="$FROM_VAR"
+    dnl loop until it doesn't change anymore
+    while true; do
+        new_full_var="`eval echo $full_var`"
+        if test "x$new_full_var" = "x$full_var"; then break; fi
+        full_var=$new_full_var
+    done
+
+    dnl clean up
+    full_var=$new_full_var
+    AC_SUBST([$1], "$full_var")
+
+    dnl restore prefix and exec_prefix
+    prefix=$prefix_save
+    exec_prefix=$exec_prefix_save
+])
+
+dnl Available from the GNU Autoconf Macro Archive at:
+dnl http://www.gnu.org/software/ac-archive/htmldoc/ax_compare_version.html
+AC_DEFUN([AX_COMPARE_VERSION], [
+# Used to indicate true or false condition
+ax_compare_version=false
+	  # Convert the two version strings to be compared into a format that
+  # allows a simple string comparison.  The end result is that a version
+  # string of the form 1.12.5-r617 will be converted to the form
+  # 0001001200050617.  In other words, each number is zero padded to four
+  # digits, and non digits are removed.
+  AS_VAR_PUSHDEF([A],[ax_compare_version_A])
+  A=`echo "$1" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \
+                     -e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \
+                     -e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \
+                     -e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \
+                     -e 's/[[^0-9]]//g'`
+
+  AS_VAR_PUSHDEF([B],[ax_compare_version_B])
+  B=`echo "$3" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \
+                     -e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \
+                     -e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \
+                     -e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \
+                     -e 's/[[^0-9]]//g'`
+
+  dnl # In the case of le, ge, lt, and gt, the strings are sorted as necessary
+  dnl # then the first line is used to determine if the condition is true.
+  dnl # The sed right after the echo is to remove any indented white space.
+  m4_case(m4_tolower($2),
+  [lt],[
+    ax_compare_version=`echo "x$A
+x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/false/;s/x${B}/true/;1q"`
+  ],
+  [gt],[
+    ax_compare_version=`echo "x$A
+x$B" | sed 's/^ *//' | sort | sed "s/x${A}/false/;s/x${B}/true/;1q"`
+  ],
+  [le],[
+    ax_compare_version=`echo "x$A
+x$B" | sed 's/^ *//' | sort | sed "s/x${A}/true/;s/x${B}/false/;1q"`
+  ],
+  [ge],[
+    ax_compare_version=`echo "x$A
+x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/true/;s/x${B}/false/;1q"`
+  ],[
+    dnl Split the operator from the subversion count if present.
+    m4_bmatch(m4_substr($2,2),
+    [0],[
+      # A count of zero means use the length of the shorter version.
+      # Determine the number of characters in A and B.
+      ax_compare_version_len_A=`echo "$A" | awk '{print(length)}'`
+      ax_compare_version_len_B=`echo "$B" | awk '{print(length)}'`
+
+      # Set A to no more than B's length and B to no more than A's length.
+      A=`echo "$A" | sed "s/\(.\{$ax_compare_version_len_B\}\).*/\1/"`
+      B=`echo "$B" | sed "s/\(.\{$ax_compare_version_len_A\}\).*/\1/"`
+    ],
+    [[0-9]+],[
+      # A count greater than zero means use only that many subversions
+      A=`echo "$A" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"`
+      B=`echo "$B" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"`
+    ],
+    [.+],[
+      AC_WARNING(
+        [illegal OP numeric parameter: $2])
+    ],[])
+
+    # Pad zeros at end of numbers to make same length.
+    ax_compare_version_tmp_A="$A`echo $B | sed 's/./0/g'`"
+    B="$B`echo $A | sed 's/./0/g'`"
+    A="$ax_compare_version_tmp_A"
+
+    # Check for equality or inequality as necessary.
+    m4_case(m4_tolower(m4_substr($2,0,2)),
+    [eq],[
+      test "x$A" = "x$B" && ax_compare_version=true
+    ],
+    [ne],[
+      test "x$A" != "x$B" && ax_compare_version=true
+    ],[
+      AC_WARNING([illegal OP parameter: $2])
+    ])
+  ])
+
+  AS_VAR_POPDEF([A])dnl
+  AS_VAR_POPDEF([B])dnl
+
+  dnl # Execute ACTION-IF-TRUE / ACTION-IF-FALSE.
+  if test "$ax_compare_version" = "true" ; then
+    m4_ifvaln([$4],[$4],[:])dnl
+    m4_ifvaln([$5],[else $5])dnl
+  fi
+]) dnl AX_COMPARE_VERSION
diff --git a/cgm b/cgm
index 423cbc5..71e80cd 100755
--- a/cgm
+++ b/cgm
@@ -45,6 +45,8 @@ usage() {
 	echo
 	echo "   $me listchildren <controller> <cgroup>"
 	echo
+	echo "   $me removeonempty <controller> <cgroup>"
+	echo
 	echo "   $me apiversion"
 	echo ""
 	echo " Replace '<controller>' with the desired controller, i.e."
@@ -168,6 +170,19 @@ case "$1" in
 		dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.ListChildren string:$2 string:$3
 		exit $?
 	;;
+	removeonempty)
+		if [ $# -lt 3 ]; then
+			usage $0
+		fi
+		if [ "$2" = "all" ]; then
+			for cg in `awk '!/^#/ { print $1 }' /proc/cgroups`; do
+				dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.RemoveOnEmpty string:$cg string:$3
+			done
+		else
+			dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.RemoveOnEmpty string:$2 string:$3
+		fi
+		exit $?
+	;;
 	apiversion)
 		dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock /org/linuxcontainers/cgmanager org.freedesktop.DBus.Properties.Get string:'org.linuxcontainers.cgmanager0_0' string:'api_version'
 		exit $?
diff --git a/cgm-release-agent.c b/cgm-release-agent.c
new file mode 100644
index 0000000..b834465
--- /dev/null
+++ b/cgm-release-agent.c
@@ -0,0 +1,92 @@
+/* cgmanager
+ *
+ * Copyright © 2014 Canonical
+ * Author: Serge Hallyn <serge.hallyn at ubuntu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "cgmanager.h"
+#include "cgmanager-client.h"
+
+#include <nih-dbus/dbus_connection.h>
+#include "cgmanager-client.h"
+#include <nih/alloc.h>
+#include <nih/error.h>
+#include <nih/logging.h>
+#include <nih/string.h>
+
+#define CG_REMOVE_RECURSIVE 1
+
+int do_remove_cgroup(const char *controller, const char *cgroup)
+{
+	DBusError dbus_error;
+	DBusConnection *connection;
+	dbus_error_init(&dbus_error);
+	NihDBusProxy *cgroup_manager = NULL;
+
+	connection = dbus_connection_open_private(CGMANAGER_DBUS_PATH, &dbus_error);
+	if (!connection) {
+		nih_error("Failed opening dbus connection: %s: %s",
+				dbus_error.name, dbus_error.message);
+		dbus_error_free(&dbus_error);
+		return -1;
+	}
+	if (nih_dbus_setup(connection, NULL) < 0) {
+		NihError *nerr;
+		nerr = nih_error_get();
+		nih_free(nerr);
+		dbus_error_free(&dbus_error);
+		dbus_connection_unref(connection);
+		return -1;
+	}
+	dbus_error_free(&dbus_error);
+	cgroup_manager = nih_dbus_proxy_new(NULL, connection,
+				NULL /* p2p */,
+				"/org/linuxcontainers/cgmanager", NULL, NULL);
+	dbus_connection_unref(connection);
+	if (!cgroup_manager) {
+		NihError *nerr;
+		nerr = nih_error_get();
+		nih_free(nerr);
+		return -1;
+	}
+
+	int existed;
+	if ( cgmanager_remove_sync(NULL, cgroup_manager, controller,
+				   cgroup, CG_REMOVE_RECURSIVE, &existed) != 0) {
+		NihError *nerr;
+		nerr = nih_error_get();
+		nih_free(nerr);
+	}
+	nih_free(cgroup_manager);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	char *p;
+
+	nih_assert (argv[1] != NULL);
+
+	p = strstr(argv[0], ".");
+	if (!p)
+		return -1;
+
+	/* controller is now in *(p+1), cgroup is in argv[1] */
+	return do_remove_cgroup(p+1, argv[1]);
+}
diff --git a/cgmanager-proxy.c b/cgmanager-proxy.c
index d6e2113..49f6343 100644
--- a/cgmanager-proxy.c
+++ b/cgmanager-proxy.c
@@ -837,6 +837,57 @@ out:
 	return ret;
 }
 
+int remove_on_empty_main (const char *controller, const char *cgroup,
+		struct ucred p, struct ucred r)
+{
+	DBusMessage *message;
+	DBusMessageIter iter;
+	int sv[2], ret = -1;
+	char buf[1];
+
+	if (memcmp(&p, &r, sizeof(struct ucred)) != 0) {
+		nih_error("%s: proxy != requestor", __func__);
+		return -1;
+	}
+
+	if (!sane_cgroup(cgroup)) {
+		nih_error("%s: unsafe cgroup", __func__);
+		return -1;
+	}
+
+	if (!(message = start_dbus_request("RemoveOnEmptyScm", sv))) {
+		nih_error("%s: error starting dbus request", __func__);
+		return -1;
+	}
+
+	dbus_message_iter_init_append(message, &iter);
+	if (! dbus_message_iter_append_basic (&iter, DBUS_TYPE_STRING, &controller)) {
+		nih_error("%s: out of memory", __func__);
+		goto out;
+	}
+	if (! dbus_message_iter_append_basic (&iter, DBUS_TYPE_STRING, &cgroup)) {
+		nih_error("%s: out of memory", __func__);
+		goto out;
+	}
+	if (! dbus_message_iter_append_basic (&iter, DBUS_TYPE_UNIX_FD, &sv[1])) {
+		nih_error("%s: out of memory", __func__);
+		goto out;
+	}
+
+	if (!complete_dbus_request(message, sv, &r, NULL)) {
+		nih_error("%s: error completing dbus request", __func__);
+		goto out;
+	}
+
+	if (recv(sv[0], buf, 1, 0) == 1 && (*buf == '1'))
+		ret = 0;
+out:
+	close(sv[0]);
+	close(sv[1]);
+	return ret;
+}
+
+
 /**
  * options:
  *
diff --git a/cgmanager.c b/cgmanager.c
index 520e3b0..a532f47 100644
--- a/cgmanager.c
+++ b/cgmanager.c
@@ -666,6 +666,58 @@ int list_children_main(void *parent, const char *controller, const char *cgroup,
 	return get_child_directories(parent, path, output);
 }
 
+int remove_on_empty_main(const char *controller, const char *cgroup,
+		struct ucred p, struct ucred r)
+{
+	char rcgpath[MAXPATHLEN];
+	size_t cgroup_len;
+	nih_local char *working = NULL, *wcgroup = NULL;
+
+	if (!sane_cgroup(cgroup)) {
+		nih_error("%s: unsafe cgroup", __func__);
+		return -1;
+	}
+
+	// Get r's current cgroup in rcgpath
+	if (!compute_pid_cgroup(r.pid, controller, "", rcgpath, NULL)) {
+		nih_error("%s: Could not determine the requested cgroup", __func__);
+		return -1;
+	}
+
+	cgroup_len = strlen(cgroup);
+
+	if (strlen(rcgpath) + cgroup_len > MAXPATHLEN) {
+		nih_error("%s: Path name too long", __func__);
+		return -1;
+	}
+
+	wcgroup = NIH_MUST( nih_strdup(NULL, cgroup) );
+	if (!normalize_path(wcgroup))
+		return -1;
+
+	working = NIH_MUST( nih_strdup(NULL, rcgpath) );
+	NIH_MUST( nih_strcat(&working, NULL, "/") );
+	NIH_MUST( nih_strcat(&working, NULL, wcgroup) );
+
+	if (!dir_exists(working)) {
+		return -1;
+	}
+	// must have write access
+	if (!may_access(r.pid, r.uid, r.gid, working, O_WRONLY)) {
+		nih_error("%s: pid %d (%u:%u) may not remove %s", __func__,
+			r.pid, r.uid, r.gid, working);
+		return -1;
+	}
+
+	NIH_MUST( nih_strcat(&working, NULL, "/notify_on_release") );
+
+	if (!set_value_trusted(working, "1\n")) {
+		nih_error("Failed to set remove_on_empty for %s:%s", controller, working);
+		return -1;
+	}
+
+	return 0;
+}
 char *extra_cgroup_mounts;
 
 static int
@@ -783,7 +835,23 @@ main (int argc, char *argv[])
 				  client_disconnect);
 	nih_assert (server != NULL);
 
-	if (setup_cgroup_mounts(extra_cgroup_mounts) < 0) {
+	if (!setup_base_run_path()) {
+		nih_fatal("Error setting up base cgroup path");
+		return -1;
+	}
+
+	if (collect_subsystems(extra_cgroup_mounts) < 0)
+	{
+		nih_fatal("failed to collect cgroup subsystems");
+		exit(1);
+	}
+
+	if (!create_agent_symlinks()) {
+		nih_fatal("Error creating release agent symlinks");
+		exit(1);
+	}
+
+	if (setup_cgroup_mounts() < 0) {
 		nih_fatal ("Failed to set up cgroup mounts");
 		exit(1);
 	}
diff --git a/configure.ac b/configure.ac
index ca458d1..8ddfa52 100644
--- a/configure.ac
+++ b/configure.ac
@@ -22,4 +22,6 @@ PKG_CHECK_MODULES([DBUS], [dbus-1 >= 1.2.16])
 
 AC_PATH_PROG(HELP2MAN, help2man, false // No help2man //)
 
+AS_AC_EXPAND(SBINDIR, "$sbindir")
+
 AC_OUTPUT
diff --git a/frontend.c b/frontend.c
index 42b913e..1b04fe8 100644
--- a/frontend.c
+++ b/frontend.c
@@ -76,6 +76,7 @@ static struct scm_sock_data *alloc_scm_sock_data(NihDBusMessage *message,
 	return d;
 }
 
+#if 0
 static const char *req_type_to_str(enum req_type r)
 {
 	switch(r) {
@@ -90,9 +91,11 @@ static const char *req_type_to_str(enum req_type r)
 		case REQ_TYPE_GET_TASKS: return "get_tasks";
 		case REQ_TYPE_CHMOD: return "chmod";
 		case REQ_TYPE_LIST_CHILDREN: return "list_children";
+		case REQ_TYPE_REMOVE_ON_EMPTY: return "remove_on_empty";
 		default: return "invalid";
 	}
 }
+#endif
 
 /*
  * All Scm-enhanced transactions take at least one SCM cred,
@@ -188,6 +191,7 @@ static void sock_scm_reader(struct scm_sock_data *data,
 	case REQ_TYPE_REMOVE: remove_scm_complete(data); break;
 	case REQ_TYPE_GET_TASKS: get_tasks_scm_complete(data); break;
 	case REQ_TYPE_LIST_CHILDREN: list_children_scm_complete(data); break;
+	case REQ_TYPE_REMOVE_ON_EMPTY: remove_on_empty_scm_complete(data); break;
 	default:
 		nih_fatal("%s: bad req_type %d", __func__, data->type);
 		exit(1);
@@ -1304,6 +1308,91 @@ int cgmanager_list_children (void *data, NihDBusMessage *message,
 	return ret;
 }
 
+void remove_on_empty_scm_complete(struct scm_sock_data *data)
+{
+	char b = '0';
+
+	if (remove_on_empty_main(data->controller, data->cgroup, data->pcred,
+				data->rcred) == 0)
+		b = '1';
+	if (write(data->fd, &b, 1) < 0)
+		nih_error("RemoveOnEmptyScm: Error writing final result to client");
+}
+
+int cgmanager_remove_on_empty_scm (void *data, NihDBusMessage *message,
+		 const char *controller, const char *cgroup, int sockfd)
+{
+	struct scm_sock_data *d;
+
+	d = alloc_scm_sock_data(message, sockfd, REQ_TYPE_REMOVE_ON_EMPTY);
+	if (!d)
+		return -1;
+	d->controller = NIH_MUST( nih_strdup(d, controller) );
+	d->cgroup = NIH_MUST( nih_strdup(d, cgroup) );
+
+	if (!nih_io_reopen(NULL, sockfd, NIH_IO_MESSAGE,
+				(NihIoReader) sock_scm_reader,
+				(NihIoCloseHandler) scm_sock_close,
+				scm_sock_error_handler, d)) {
+		NihError *error = nih_error_steal ();
+		nih_dbus_error_raise_printf (DBUS_ERROR_INVALID_ARGS,
+			"Failed queue scm message: %s", error->message);
+		nih_free(error);
+		return -1;
+	}
+	if (!kick_fd_client(sockfd)) {
+		nih_dbus_error_raise_printf (DBUS_ERROR_INVALID_ARGS,
+			"Error writing to client: %s", strerror(errno));
+		return -1;
+	}
+	return 0;
+}
+
+/* 
+ * This is one of the dbus callbacks.
+ * Caller requests that cgroup @cgroup in controller @controller be
+ * marked to be removed when it becomes empty, meaning there are no
+ * more sub-cgroups and no tasks.
+ */
+int cgmanager_remove_on_empty (void *data, NihDBusMessage *message,
+		const char *controller, const char *cgroup)
+{
+	int fd = 0, ret;
+	struct ucred rcred;
+	socklen_t len;
+
+	if (message == NULL) {
+		nih_dbus_error_raise_printf (DBUS_ERROR_INVALID_ARGS,
+			"message was null");
+		return -1;
+	}
+
+	if (!dbus_connection_get_socket(message->connection, &fd)) {
+		nih_dbus_error_raise_printf (DBUS_ERROR_INVALID_ARGS,
+					     "Could not get client socket.");
+		return -1;
+	}
+
+	len = sizeof(struct ucred);
+	if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &rcred, &len) < 0) {
+		nih_dbus_error_raise_printf (DBUS_ERROR_INVALID_ARGS,
+					     "Could not get peer cred: %s",
+					     strerror(errno));
+		return -1;
+	}
+
+	nih_info (_("RemoveOnEmpty: Client fd is: %d (pid=%d, uid=%u, gid=%u)"),
+			fd, rcred.pid, rcred.uid, rcred.gid);
+
+	ret = remove_on_empty_main(controller, cgroup, rcred, rcred);
+	if (ret >= 0)
+		ret = 0;
+	else
+		nih_dbus_error_raise_printf (DBUS_ERROR_INVALID_ARGS,
+					     "invalid request");
+	return ret;
+}
+
 int
 cgmanager_get_api_version(void *data, NihDBusMessage *message, int *version)
 {
diff --git a/frontend.h b/frontend.h
index 935231f..0c6c1f6 100644
--- a/frontend.h
+++ b/frontend.h
@@ -103,6 +103,7 @@ enum req_type {
 	REQ_TYPE_CHMOD,
 	REQ_TYPE_MOVE_PID_ABS,
 	REQ_TYPE_LIST_CHILDREN,
+	REQ_TYPE_REMOVE_ON_EMPTY,
 };
 
 int get_pid_cgroup_main(void *parent, const char *controller,
@@ -141,6 +142,10 @@ int list_children_main (void *parent, const char *controller, const char *cgroup
 		struct ucred p, struct ucred r, char ***output);
 void list_children_scm_complete(struct scm_sock_data *data);
 
+int remove_on_empty_main (const char *controller, const char *cgroup,
+		struct ucred p, struct ucred r);
+void remove_on_empty_scm_complete(struct scm_sock_data *data);
+
 int cgmanager_ping (void *data, NihDBusMessage *message, int junk);
 
 int client_connect (DBusServer *server, DBusConnection *conn);
diff --git a/fs.c b/fs.c
index e4bccbd..c245964 100644
--- a/fs.c
+++ b/fs.c
@@ -49,16 +49,39 @@
 #include <nih-dbus/dbus_connection.h>
 #include <nih-dbus/dbus_proxy.h>
 
+/* defines relating to the release agent */
+#define AGENT SBINDIR "/cgm-release-agent"
+#define AGENT_LINK_PATH "/run/cgmanager/agents"
+
 struct controller_mounts {
 	char *controller;
 	char *options;
 	char *path;
+	char *src;
+	char *agent;
 };
 
 static struct controller_mounts *all_mounts;
 static int num_controllers;
 
 static char *base_path;
+
+bool file_exists(const char *path)
+{
+	struct stat sb;
+	if (stat(path, &sb) < 0)
+		return false;
+	return true;
+}
+
+bool dir_exists(const char *path)
+{
+	struct stat sb;
+	if (stat(path, &sb) < 0 || !S_ISDIR(sb.st_mode))
+		return false;
+	return true;
+}
+
 /*
  * Where do we want to mount the controllers?  We used to mount
  * them under a tmpfs under /sys/fs/cgroup, for all to share.  Now
@@ -66,7 +89,7 @@ static char *base_path;
  * TODO read this from configuration file too
  * TODO do we want to create these in a tmpfs?
  */
-static bool setup_base_path(void)
+bool setup_base_run_path(void)
 {
 	base_path = strdup("/run/cgmanager/fs");
 	if (!base_path) {
@@ -85,6 +108,10 @@ static bool setup_base_path(void)
 		nih_fatal("%s: failed to create /run/cgmanager/fs", __func__);
 		return false;
 	}
+	if (mkdir(AGENT_LINK_PATH, 0755) < 0 && errno != EEXIST) {
+		nih_fatal("%s: failed to create %s", __func__, AGENT_LINK_PATH);
+		return false;
+	}
 	return true;
 }
 
@@ -128,7 +155,7 @@ static void set_use_hierarchy(const char *path)
 	fclose(f);
 }
 
-static bool do_mount_subsys(char *s)
+static bool save_mount_subsys(char *s)
 {
 	struct controller_mounts *tmp;
 	char *src, dest[MAXPATHLEN], *controller;
@@ -157,15 +184,6 @@ static bool do_mount_subsys(char *s)
 		ret = -1;
 		goto out;
 	}
-	if (mkdir(dest, 0755) < 0 && errno != EEXIST) {
-		nih_fatal("Failed to create %s: %s", dest, strerror(errno));
-		ret = -1;
-		goto out;
-	}
-	if ((ret = mount(src, dest, "cgroup", 0, src)) < 0) {
-		nih_fatal("Failed mounting %s: %s", s, strerror(errno));
-		goto out;
-	}
 	ret = -1;
 	tmp = realloc(all_mounts, (num_controllers+1) * sizeof(*all_mounts));
 	if (!tmp) {
@@ -180,58 +198,99 @@ static bool do_mount_subsys(char *s)
 	}
 	all_mounts[num_controllers].options = NULL;
 	all_mounts[num_controllers].path = strdup(dest);
-	if (!all_mounts[num_controllers].path) {
+	all_mounts[num_controllers].src = strdup(src);
+	if (!all_mounts[num_controllers].path ||
+			!all_mounts[num_controllers].src) {
 		nih_fatal("Out of memory mounting controllers");
 		goto out;
 	}
 	nih_info(_("Mounted %s onto %s"),
 			all_mounts[num_controllers].controller,
 			all_mounts[num_controllers].path);
-	if (strcmp(all_mounts[num_controllers].controller, "cpuset") == 0) {
+	num_controllers++;
+	return true;
+
+out:
+	return false;
+}
+
+static bool set_release_agent(struct controller_mounts *m)
+{
+	FILE *f;
+	char path[MAXPATHLEN];
+	int ret;
+
+	ret = snprintf(path, MAXPATHLEN, "%s/release_agent", m->path);
+	if (ret < 0 || ret >= MAXPATHLEN) {
+		nih_error("out of memory");
+		return false;
+	}
+	if ((f = fopen(path, "w")) == NULL) {
+		nih_error("failed to open %s for writing", path);
+		return false;
+	}
+	if (fprintf(f, "%s\n", m->agent) < 0) {
+		nih_error("failed to set release agent for %s",
+				m->controller);
+		fclose(f);
+		return false;
+	}
+	if (fclose(f) != 0) {
+		nih_error("failed to set release agent for %s",
+				m->controller);
+		return false;
+	}
+	return true;
+}
+
+static bool do_mount_subsys(int i)
+{
+	char *src, *dest, *controller;
+	int ret;
+
+	dest = all_mounts[i].path;
+	controller = all_mounts[i].controller;
+	src = all_mounts[i].src;
+
+	if (mkdir(dest, 0755) < 0 && errno != EEXIST) {
+		nih_fatal("Failed to create %s: %s", dest, strerror(errno));
+		ret = -1;
+		goto out;
+	}
+	if ((ret = mount(src, dest, "cgroup", 0, src)) < 0) {
+		nih_fatal("Failed mounting %s: %s", dest, strerror(errno));
+		goto out;
+	}
+	nih_info(_("Mounted %s onto %s"), controller, dest);
+	if (strcmp(controller, "cpuset") == 0) {
 		set_clone_children(dest); // TODO make this optional?
 		nih_info(_("set clone_children"));
-	} else if (strcmp(all_mounts[num_controllers].controller, "memory") == 0) {
+	} else if (strcmp(controller, "memory") == 0) {
 		set_use_hierarchy(dest);  // TODO make this optional?
 		nih_info(_("set memory.use_hierarchy"));
 	}
-	num_controllers++;
+
+	if (!set_release_agent(&all_mounts[i])) {
+		nih_error("failed to set release agent for %s",
+				all_mounts[i].controller);
+		return false;
+	}
 	return true;
 
 out:
 	return false;
 }
 
-/**
- * Mount the cgroup filesystems and record the information.
- * This should take configuration data from /etc.  For now,
- * Just mount all controllers, separately just as cgroup-lite
- * does, and set the use_hierarchy and clone_children options.
- *
- * Things which should go into configuration file:
- * . which controllers to mount
- * . which controllers to co-mount
- * . any mount options (per-controller)
- * . values for sane_behavior, use_hierarchy, and clone_children
- */
-int setup_cgroup_mounts(char *extra_mounts)
+int collect_subsystems(char *extra_mounts)
 {
 	FILE *cgf;
 	int ret;
 	char line[400];
 
-	if (unshare(CLONE_NEWNS) < 0) {
-		nih_fatal("Failed to unshare a private mount ns: %s", strerror(errno));
-		return -1;
-	}
-	if (!setup_base_path()) {
-		nih_fatal("Error setting up base cgroup path");
-		return -1;
-	}
-
 	if (extra_mounts) {
 		char *e;
 		for (e = strtok(extra_mounts, ","); e; e = strtok(NULL, ",")) {
-			if (!do_mount_subsys(e)) {
+			if (!save_mount_subsys(e)) {
 				nih_fatal("Error loading subsystem \"%s\"", e);
 				return -1;
 			}
@@ -261,19 +320,99 @@ int setup_cgroup_mounts(char *extra_mounts)
 #endif
 		}
 
-		if (!do_mount_subsys(line)) {
-			nih_fatal("Error mounting subsystem %s", line);
+		if (!save_mount_subsys(line)) {
+			nih_fatal("Error storing subsystem %s", line);
 			ret = -1;
 			goto out;
 		}
 	}
-	nih_info(_("mounted %d controllers"), num_controllers);
+	nih_info(_("found %d controllers"), num_controllers);
 	ret = 0;
 out:
 	fclose(cgf);
 	return ret;
 }
 
+/**
+ * Mount the cgroup filesystems and record the information.
+ * This should take configuration data from /etc.  For now,
+ * Just mount all controllers, separately just as cgroup-lite
+ * does, and set the use_hierarchy and clone_children options.
+ *
+ * Things which should go into configuration file:
+ * . which controllers to mount
+ * . which controllers to co-mount
+ * . any mount options (per-controller)
+ * . values for sane_behavior, use_hierarchy, and clone_children
+ */
+int setup_cgroup_mounts(void)
+{
+	int i;
+
+	if (unshare(CLONE_NEWNS) < 0) {
+		nih_fatal("Failed to unshare a private mount ns: %s", strerror(errno));
+		return 0;
+	}
+
+	for (i=0; i<num_controllers; i++) {
+		if (!do_mount_subsys(i)) {
+			nih_fatal("Failed mounting cgroups");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * In the old release agent support, the release agent is not told the
+ * controller in which the cgroup was freed.  Therefore we need to have a
+ * different binary for each mounted controller.  We will create these under
+ * /run/cgmanager/agents/ as symlinks to /sbin/cgm-release-agent, i.e.
+ * /run/cgmanager/agents/cgm-release-agent.freezer.
+ */
+bool create_agent_symlinks(void)
+{
+	struct stat statbuf;
+	char buf[MAXPATHLEN];
+	int i, ret, plen;
+
+	ret = stat(AGENT, &statbuf);
+	if (ret < 0) {
+		nih_error("release agent not found");
+		return false;
+	}
+
+	plen = snprintf(buf, MAXPATHLEN, "%s/", AGENT_LINK_PATH);
+	if (plen < 0 || plen >= MAXPATHLEN) {
+		nih_error("memory error");
+		return false;
+	}
+
+	for (i=0; i<num_controllers; i++) {
+		ret = snprintf(buf+plen, MAXPATHLEN-plen, "cgm-release-agent.%s",
+				all_mounts[i].controller);
+		if (ret < 0 || ret >= MAXPATHLEN) {
+			nih_error("path names too long");
+			return false;
+		}
+		nih_info("buf is %s", buf);
+		if (!file_exists(buf)) {
+			if (symlink(AGENT, buf) < 0) {
+				nih_error("failed to create release agent for %s",
+					all_mounts[i].controller);
+				return false;
+			}
+		}
+		if ((all_mounts[i].agent = strdup(buf)) == NULL) {
+			nih_error("out of memory");
+			return false;
+		}
+	}
+
+	return true;
+}
+
 static inline void drop_newlines(char *s)
 {
 	int l;
@@ -752,12 +891,16 @@ bool chmod_cgroup_path(const char *path, int mode)
 /*
  * TODO - make this more baroque to allow ranges etc
  */
-static char *set_value_blacklist[] = { "tasks", "release-agent", "cgroup.procs" };
+static char *set_value_blacklist[] = { "tasks",
+	"release-agent",
+	"cgroup.procs",
+	"notify-on-release"
+};
 static size_t blacklist_len = sizeof(set_value_blacklist)/sizeof(char *);
 
-bool set_value(const char *path, const char *value)
+bool set_value_trusted(const char *path, const char *value)
 {
-	int i, len;
+	int len;
 	FILE *f;
 
 	nih_assert (path);
@@ -767,17 +910,6 @@ bool set_value(const char *path, const char *value)
 
 	len = strlen(value);
 
-	for (i = 0; i < blacklist_len; i++) {
-		const char *p = strrchr(path, '/');
-		if (p)
-			p++;
-		else
-			p = path;
-		if (strcmp(p, set_value_blacklist[i]) == 0) {
-			nih_error("attempted write to %s", set_value_blacklist[i]);
-			return false;
-		}
-	}
 	if ((f = fopen(path, "w")) == NULL) {
 		nih_error("Error opening %s for writing", path);
 		return false;
@@ -796,6 +928,26 @@ bool set_value(const char *path, const char *value)
 	}
 	return true;
 }
+bool set_value(const char *path, const char *value)
+{
+	int i;
+
+	nih_assert (path);
+
+	for (i = 0; i < blacklist_len; i++) {
+		const char *p = strrchr(path, '/');
+		if (p)
+			p++;
+		else
+			p = path;
+		if (strcmp(p, set_value_blacklist[i]) == 0) {
+			nih_error("attempted write to %s", set_value_blacklist[i]);
+			return false;
+		}
+	}
+
+	return set_value_trusted(path, value);
+}
 
 /*
  * Tiny helper to read the /proc/pid/ns/pid link for a given pid.
@@ -851,22 +1003,6 @@ bool realpath_escapes(char *path, char *safety)
 	return false;
 }
 
-bool file_exists(const char *path)
-{
-	struct stat sb;
-	if (stat(path, &sb) < 0)
-		return false;
-	return true;
-}
-
-bool dir_exists(const char *path)
-{
-	struct stat sb;
-	if (stat(path, &sb) < 0 || !S_ISDIR(sb.st_mode))
-		return false;
-	return true;
-}
-
 /*
  * move_self_to_root: called by cgmanager at startup to make sure
  * it starts in /
diff --git a/fs.h b/fs.h
index d595613..8e60ef3 100644
--- a/fs.h
+++ b/fs.h
@@ -24,7 +24,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-int setup_cgroup_mounts(char *extra_mounts);
+int collect_subsystems(char *extra_mounts);
+int setup_cgroup_mounts(void);
 bool compute_pid_cgroup(pid_t pid, const char *controller, const char *cgroup,
 		char *path, int *depth);
 int get_path_depth(const char *path);
@@ -38,6 +39,7 @@ bool hostuid_to_ns(uid_t uid, pid_t pid, uid_t *answer);
 bool chown_cgroup_path(const char *path, uid_t uid, gid_t gid, bool all_children);
 bool chmod_cgroup_path(const char *path, int mode);
 bool set_value(const char *path, const char *value);
+bool set_value_trusted(const char *path, const char *value);
 unsigned long read_pid_ns_link(int pid);
 unsigned long read_user_ns_link(int pid);
 bool realpath_escapes(char *path, char *safety);
@@ -45,3 +47,5 @@ bool file_exists(const char *path);
 bool dir_exists(const char *path);
 bool move_self_to_root(void);
 int get_child_directories(void *parent, const char *path, char ***output);
+bool setup_base_run_path(void);
+bool create_agent_symlinks(void);
diff --git a/org.linuxcontainers.cgmanager.xml b/org.linuxcontainers.cgmanager.xml
index 619125b..d20af88 100644
--- a/org.linuxcontainers.cgmanager.xml
+++ b/org.linuxcontainers.cgmanager.xml
@@ -171,8 +171,16 @@
       <arg name="cgroup" type="s" direction="in" />
       <arg name="output" type="as" direction="out" />
     </method>
+    <method name="RemoveOnEmptyScm">
+      <arg name="controller" type="s" direction="in" />
+      <arg name="cgroup" type="s" direction="in" />
+      <arg name="sockfd" type="h" direction="in" />
+    </method>
+    <method name="RemoveOnEmpty">
+      <arg name="controller" type="s" direction="in" />
+      <arg name="cgroup" type="s" direction="in" />
+    </method>
     <!-- still to add: low priority,
-	 Prune (remove all empty decendents)
 	 removeWhenEmpty
 	 getEventfd
 	 -->
diff --git a/tests/test21.sh b/tests/test21.sh
new file mode 100755
index 0000000..9c11172
--- /dev/null
+++ b/tests/test21.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+echo "Test 21: remove_on_empty"
+
+cg="test21_cg"
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.Remove string:'memory' string:$cg || true
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.Remove string:'devices' string:$cg || true
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.Create string:'memory' string:$cg || true
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.Create string:'devices' string:$cg || true
+sleep 200 &
+pid=$!
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.MovePid string:'memory' string:$cg int32:$pid
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.MovePid string:'devices' string:$cg int32:$pid
+
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.RemoveOnEmpty string:'memory' string:$cg
+
+kill $pid
+
+# now $cg should be deleted in memory, but not in devices
+# note if logind or upstart has set this for us then this will raise a false positive
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.GetTasks string:'devices' string:$cg >/dev/null 2>&1
+if [ $? -ne 0 ]; then
+	echo "Remove-on-empty affected another cgroup"
+	exit 1
+fi
+
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.GetTasks string:'memory' string:$cg >/dev/null 2>&1
+if [ $? -eq 0 ]; then
+	echo "Failed to remove-on-empty"
+	exit 1
+fi
+
+echo "Test 21 (remove_on_empty) passed"
+exit 0
-- 
1.9.1



More information about the cgmanager-devel mailing list