[cgmanager-devel] [PATCH 1/1] implement release-on-empty
Serge Hallyn
serge.hallyn at ubuntu.com
Sat Mar 29 01:25:20 UTC 2014
The new kernel mechanism for automatic removal of empty cgroups
is not yet available, therefore this uses the original
release-agent feature. The release-agent is only told the
cgroup name being deleted, not the controller.
Cgmanager installs a program cgm-release-agent into /sbin. It
then symlinks that to cgm-release-agent.$controller under
/run/cgmanager/agents and registers each symlink with the
related controller. The RemoveOnEmpty dbus call causes 1 to
be written to the notify-on-release file for the specified
cgroup, so that when it becomes empty the
cgm-release-agent.$controller will be called.
As a part of this the mounting of subsystems is broken up into
collecting the subsystems, then setting release agents, and
finally mounting the subsystems into our private namespace.
Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
---
.gitignore | 1 +
Makefile.am | 8 +-
acinclude.m4 | 137 +++++++++++++++++++
cgm | 15 ++
cgm-release-agent.c | 92 +++++++++++++
cgmanager-proxy.c | 51 +++++++
cgmanager.c | 70 +++++++++-
configure.ac | 2 +
frontend.c | 89 ++++++++++++
frontend.h | 5 +
fs.c | 278 ++++++++++++++++++++++++++++----------
fs.h | 6 +-
org.linuxcontainers.cgmanager.xml | 10 +-
tests/test21.sh | 34 +++++
14 files changed, 722 insertions(+), 76 deletions(-)
create mode 100644 acinclude.m4
create mode 100644 cgm-release-agent.c
create mode 100755 tests/test21.sh
diff --git a/.gitignore b/.gitignore
index 0a408cd..7decb58 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,3 +51,4 @@ cgm.1
tests/*.o
tests/cgm-concurrent
00*
+cgm-release-agent
diff --git a/Makefile.am b/Makefile.am
index 5d62ada..8023250 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -7,7 +7,7 @@ VERSION_AGE = 0
LIBCGMANAGER_VERSION = $(VERSION_CURRENT):$(VERSION_REVISION):$(VERSION_AGE)
LIBCGMANAGER_VERSION_DOTTED = $(VERSION_CURRENT).$(VERSION_REVISION).$(VERSION_AGE)
-AM_CFLAGS = -Wall -ggdb -D_GNU_SOURCE
+AM_CFLAGS = -Wall -ggdb -D_GNU_SOURCE -DSBINDIR=\"$(SBINDIR)\"
AM_CFLAGS += $(DBUS_CFLAGS) $(NIH_CFLAGS) $(NIH_DBUS_CFLAGS)
AM_LDFLAGS = $(DBUS_LIBS) $(NIH_LIBS) $(NIH_DBUS_LIBS)
client_files_OUTPUTS = cgmanager-client.c cgmanager-client.h
@@ -19,10 +19,11 @@ CLEANFILES = \
$(manager_files_OUTPUTS) \
$(client_files_OUTPUTS) \
cgmanager cgproxy \
+ cgm-release-agent \
*.o *.so \
libcgmanager.pc.in
-sbin_PROGRAMS = cgmanager cgproxy
+sbin_PROGRAMS = cgmanager cgproxy cgm-release-agent
bin_SCRIPTS = cgm
@@ -72,6 +73,9 @@ cgproxy_SOURCES = cgmanager-proxy.c \
fs.c fs.h cgmanager.h \
frontend.c frontend.h
+cgm_release_agent_SOURCES = cgm-release-agent.c
+cgm_release_agent_LDADD = -lcgmanager
+
libcgmanager_la_SOURCES = \
$(client_files_OUTPUTS) \
org.linuxcontainers.cgmanager.xml
diff --git a/acinclude.m4 b/acinclude.m4
new file mode 100644
index 0000000..d718b5e
--- /dev/null
+++ b/acinclude.m4
@@ -0,0 +1,137 @@
+dnl as-ac-expand.m4 0.2.0
+dnl autostars m4 macro for expanding directories using configure's prefix
+dnl thomas at apestaart.org
+dnl
+
+dnl AS_AC_EXPAND(VAR, CONFIGURE_VAR)
+dnl example
+dnl AS_AC_EXPAND(SYSCONFDIR, $sysconfdir)
+dnl will set SYSCONFDIR to /usr/local/etc if prefix=/usr/local
+
+AC_DEFUN([AS_AC_EXPAND],
+[
+ EXP_VAR=[$1]
+ FROM_VAR=[$2]
+
+ dnl first expand prefix and exec_prefix if necessary
+ prefix_save=$prefix
+ exec_prefix_save=$exec_prefix
+
+ dnl if no prefix given, then use /usr/local, the default prefix
+ if test "x$prefix" = "xNONE"; then
+ prefix="$ac_default_prefix"
+ fi
+ dnl if no exec_prefix given, then use prefix
+ if test "x$exec_prefix" = "xNONE"; then
+ exec_prefix=$prefix
+ fi
+
+ full_var="$FROM_VAR"
+ dnl loop until it doesn't change anymore
+ while true; do
+ new_full_var="`eval echo $full_var`"
+ if test "x$new_full_var" = "x$full_var"; then break; fi
+ full_var=$new_full_var
+ done
+
+ dnl clean up
+ full_var=$new_full_var
+ AC_SUBST([$1], "$full_var")
+
+ dnl restore prefix and exec_prefix
+ prefix=$prefix_save
+ exec_prefix=$exec_prefix_save
+])
+
+dnl Available from the GNU Autoconf Macro Archive at:
+dnl http://www.gnu.org/software/ac-archive/htmldoc/ax_compare_version.html
+AC_DEFUN([AX_COMPARE_VERSION], [
+# Used to indicate true or false condition
+ax_compare_version=false
+ # Convert the two version strings to be compared into a format that
+ # allows a simple string comparison. The end result is that a version
+ # string of the form 1.12.5-r617 will be converted to the form
+ # 0001001200050617. In other words, each number is zero padded to four
+ # digits, and non digits are removed.
+ AS_VAR_PUSHDEF([A],[ax_compare_version_A])
+ A=`echo "$1" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \
+ -e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \
+ -e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \
+ -e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \
+ -e 's/[[^0-9]]//g'`
+
+ AS_VAR_PUSHDEF([B],[ax_compare_version_B])
+ B=`echo "$3" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \
+ -e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \
+ -e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \
+ -e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \
+ -e 's/[[^0-9]]//g'`
+
+ dnl # In the case of le, ge, lt, and gt, the strings are sorted as necessary
+ dnl # then the first line is used to determine if the condition is true.
+ dnl # The sed right after the echo is to remove any indented white space.
+ m4_case(m4_tolower($2),
+ [lt],[
+ ax_compare_version=`echo "x$A
+x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/false/;s/x${B}/true/;1q"`
+ ],
+ [gt],[
+ ax_compare_version=`echo "x$A
+x$B" | sed 's/^ *//' | sort | sed "s/x${A}/false/;s/x${B}/true/;1q"`
+ ],
+ [le],[
+ ax_compare_version=`echo "x$A
+x$B" | sed 's/^ *//' | sort | sed "s/x${A}/true/;s/x${B}/false/;1q"`
+ ],
+ [ge],[
+ ax_compare_version=`echo "x$A
+x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/true/;s/x${B}/false/;1q"`
+ ],[
+ dnl Split the operator from the subversion count if present.
+ m4_bmatch(m4_substr($2,2),
+ [0],[
+ # A count of zero means use the length of the shorter version.
+ # Determine the number of characters in A and B.
+ ax_compare_version_len_A=`echo "$A" | awk '{print(length)}'`
+ ax_compare_version_len_B=`echo "$B" | awk '{print(length)}'`
+
+ # Set A to no more than B's length and B to no more than A's length.
+ A=`echo "$A" | sed "s/\(.\{$ax_compare_version_len_B\}\).*/\1/"`
+ B=`echo "$B" | sed "s/\(.\{$ax_compare_version_len_A\}\).*/\1/"`
+ ],
+ [[0-9]+],[
+ # A count greater than zero means use only that many subversions
+ A=`echo "$A" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"`
+ B=`echo "$B" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"`
+ ],
+ [.+],[
+ AC_WARNING(
+ [illegal OP numeric parameter: $2])
+ ],[])
+
+ # Pad zeros at end of numbers to make same length.
+ ax_compare_version_tmp_A="$A`echo $B | sed 's/./0/g'`"
+ B="$B`echo $A | sed 's/./0/g'`"
+ A="$ax_compare_version_tmp_A"
+
+ # Check for equality or inequality as necessary.
+ m4_case(m4_tolower(m4_substr($2,0,2)),
+ [eq],[
+ test "x$A" = "x$B" && ax_compare_version=true
+ ],
+ [ne],[
+ test "x$A" != "x$B" && ax_compare_version=true
+ ],[
+ AC_WARNING([illegal OP parameter: $2])
+ ])
+ ])
+
+ AS_VAR_POPDEF([A])dnl
+ AS_VAR_POPDEF([B])dnl
+
+ dnl # Execute ACTION-IF-TRUE / ACTION-IF-FALSE.
+ if test "$ax_compare_version" = "true" ; then
+ m4_ifvaln([$4],[$4],[:])dnl
+ m4_ifvaln([$5],[else $5])dnl
+ fi
+]) dnl AX_COMPARE_VERSION
diff --git a/cgm b/cgm
index 423cbc5..71e80cd 100755
--- a/cgm
+++ b/cgm
@@ -45,6 +45,8 @@ usage() {
echo
echo " $me listchildren <controller> <cgroup>"
echo
+ echo " $me removeonempty <controller> <cgroup>"
+ echo
echo " $me apiversion"
echo ""
echo " Replace '<controller>' with the desired controller, i.e."
@@ -168,6 +170,19 @@ case "$1" in
dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.ListChildren string:$2 string:$3
exit $?
;;
+ removeonempty)
+ if [ $# -lt 3 ]; then
+ usage $0
+ fi
+ if [ "$2" = "all" ]; then
+ for cg in `awk '!/^#/ { print $1 }' /proc/cgroups`; do
+ dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.RemoveOnEmpty string:$cg string:$3
+ done
+ else
+ dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.RemoveOnEmpty string:$2 string:$3
+ fi
+ exit $?
+ ;;
apiversion)
dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock /org/linuxcontainers/cgmanager org.freedesktop.DBus.Properties.Get string:'org.linuxcontainers.cgmanager0_0' string:'api_version'
exit $?
diff --git a/cgm-release-agent.c b/cgm-release-agent.c
new file mode 100644
index 0000000..b834465
--- /dev/null
+++ b/cgm-release-agent.c
@@ -0,0 +1,92 @@
+/* cgmanager
+ *
+ * Copyright © 2014 Canonical
+ * Author: Serge Hallyn <serge.hallyn at ubuntu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "cgmanager.h"
+#include "cgmanager-client.h"
+
+#include <nih-dbus/dbus_connection.h>
+#include "cgmanager-client.h"
+#include <nih/alloc.h>
+#include <nih/error.h>
+#include <nih/logging.h>
+#include <nih/string.h>
+
+#define CG_REMOVE_RECURSIVE 1
+
+int do_remove_cgroup(const char *controller, const char *cgroup)
+{
+ DBusError dbus_error;
+ DBusConnection *connection;
+ dbus_error_init(&dbus_error);
+ NihDBusProxy *cgroup_manager = NULL;
+
+ connection = dbus_connection_open_private(CGMANAGER_DBUS_PATH, &dbus_error);
+ if (!connection) {
+ nih_error("Failed opening dbus connection: %s: %s",
+ dbus_error.name, dbus_error.message);
+ dbus_error_free(&dbus_error);
+ return -1;
+ }
+ if (nih_dbus_setup(connection, NULL) < 0) {
+ NihError *nerr;
+ nerr = nih_error_get();
+ nih_free(nerr);
+ dbus_error_free(&dbus_error);
+ dbus_connection_unref(connection);
+ return -1;
+ }
+ dbus_error_free(&dbus_error);
+ cgroup_manager = nih_dbus_proxy_new(NULL, connection,
+ NULL /* p2p */,
+ "/org/linuxcontainers/cgmanager", NULL, NULL);
+ dbus_connection_unref(connection);
+ if (!cgroup_manager) {
+ NihError *nerr;
+ nerr = nih_error_get();
+ nih_free(nerr);
+ return -1;
+ }
+
+ int existed;
+ if ( cgmanager_remove_sync(NULL, cgroup_manager, controller,
+ cgroup, CG_REMOVE_RECURSIVE, &existed) != 0) {
+ NihError *nerr;
+ nerr = nih_error_get();
+ nih_free(nerr);
+ }
+ nih_free(cgroup_manager);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ char *p;
+
+ nih_assert (argv[1] != NULL);
+
+ p = strstr(argv[0], ".");
+ if (!p)
+ return -1;
+
+ /* controller is now in *(p+1), cgroup is in argv[1] */
+ return do_remove_cgroup(p+1, argv[1]);
+}
diff --git a/cgmanager-proxy.c b/cgmanager-proxy.c
index d6e2113..49f6343 100644
--- a/cgmanager-proxy.c
+++ b/cgmanager-proxy.c
@@ -837,6 +837,57 @@ out:
return ret;
}
+int remove_on_empty_main (const char *controller, const char *cgroup,
+ struct ucred p, struct ucred r)
+{
+ DBusMessage *message;
+ DBusMessageIter iter;
+ int sv[2], ret = -1;
+ char buf[1];
+
+ if (memcmp(&p, &r, sizeof(struct ucred)) != 0) {
+ nih_error("%s: proxy != requestor", __func__);
+ return -1;
+ }
+
+ if (!sane_cgroup(cgroup)) {
+ nih_error("%s: unsafe cgroup", __func__);
+ return -1;
+ }
+
+ if (!(message = start_dbus_request("RemoveOnEmptyScm", sv))) {
+ nih_error("%s: error starting dbus request", __func__);
+ return -1;
+ }
+
+ dbus_message_iter_init_append(message, &iter);
+ if (! dbus_message_iter_append_basic (&iter, DBUS_TYPE_STRING, &controller)) {
+ nih_error("%s: out of memory", __func__);
+ goto out;
+ }
+ if (! dbus_message_iter_append_basic (&iter, DBUS_TYPE_STRING, &cgroup)) {
+ nih_error("%s: out of memory", __func__);
+ goto out;
+ }
+ if (! dbus_message_iter_append_basic (&iter, DBUS_TYPE_UNIX_FD, &sv[1])) {
+ nih_error("%s: out of memory", __func__);
+ goto out;
+ }
+
+ if (!complete_dbus_request(message, sv, &r, NULL)) {
+ nih_error("%s: error completing dbus request", __func__);
+ goto out;
+ }
+
+ if (recv(sv[0], buf, 1, 0) == 1 && (*buf == '1'))
+ ret = 0;
+out:
+ close(sv[0]);
+ close(sv[1]);
+ return ret;
+}
+
+
/**
* options:
*
diff --git a/cgmanager.c b/cgmanager.c
index 520e3b0..a532f47 100644
--- a/cgmanager.c
+++ b/cgmanager.c
@@ -666,6 +666,58 @@ int list_children_main(void *parent, const char *controller, const char *cgroup,
return get_child_directories(parent, path, output);
}
+int remove_on_empty_main(const char *controller, const char *cgroup,
+ struct ucred p, struct ucred r)
+{
+ char rcgpath[MAXPATHLEN];
+ size_t cgroup_len;
+ nih_local char *working = NULL, *wcgroup = NULL;
+
+ if (!sane_cgroup(cgroup)) {
+ nih_error("%s: unsafe cgroup", __func__);
+ return -1;
+ }
+
+ // Get r's current cgroup in rcgpath
+ if (!compute_pid_cgroup(r.pid, controller, "", rcgpath, NULL)) {
+ nih_error("%s: Could not determine the requested cgroup", __func__);
+ return -1;
+ }
+
+ cgroup_len = strlen(cgroup);
+
+ if (strlen(rcgpath) + cgroup_len > MAXPATHLEN) {
+ nih_error("%s: Path name too long", __func__);
+ return -1;
+ }
+
+ wcgroup = NIH_MUST( nih_strdup(NULL, cgroup) );
+ if (!normalize_path(wcgroup))
+ return -1;
+
+ working = NIH_MUST( nih_strdup(NULL, rcgpath) );
+ NIH_MUST( nih_strcat(&working, NULL, "/") );
+ NIH_MUST( nih_strcat(&working, NULL, wcgroup) );
+
+ if (!dir_exists(working)) {
+ return -1;
+ }
+ // must have write access
+ if (!may_access(r.pid, r.uid, r.gid, working, O_WRONLY)) {
+ nih_error("%s: pid %d (%u:%u) may not remove %s", __func__,
+ r.pid, r.uid, r.gid, working);
+ return -1;
+ }
+
+ NIH_MUST( nih_strcat(&working, NULL, "/notify_on_release") );
+
+ if (!set_value_trusted(working, "1\n")) {
+ nih_error("Failed to set remove_on_empty for %s:%s", controller, working);
+ return -1;
+ }
+
+ return 0;
+}
char *extra_cgroup_mounts;
static int
@@ -783,7 +835,23 @@ main (int argc, char *argv[])
client_disconnect);
nih_assert (server != NULL);
- if (setup_cgroup_mounts(extra_cgroup_mounts) < 0) {
+ if (!setup_base_run_path()) {
+ nih_fatal("Error setting up base cgroup path");
+ return -1;
+ }
+
+ if (collect_subsystems(extra_cgroup_mounts) < 0)
+ {
+ nih_fatal("failed to collect cgroup subsystems");
+ exit(1);
+ }
+
+ if (!create_agent_symlinks()) {
+ nih_fatal("Error creating release agent symlinks");
+ exit(1);
+ }
+
+ if (setup_cgroup_mounts() < 0) {
nih_fatal ("Failed to set up cgroup mounts");
exit(1);
}
diff --git a/configure.ac b/configure.ac
index ca458d1..8ddfa52 100644
--- a/configure.ac
+++ b/configure.ac
@@ -22,4 +22,6 @@ PKG_CHECK_MODULES([DBUS], [dbus-1 >= 1.2.16])
AC_PATH_PROG(HELP2MAN, help2man, false // No help2man //)
+AS_AC_EXPAND(SBINDIR, "$sbindir")
+
AC_OUTPUT
diff --git a/frontend.c b/frontend.c
index 42b913e..1b04fe8 100644
--- a/frontend.c
+++ b/frontend.c
@@ -76,6 +76,7 @@ static struct scm_sock_data *alloc_scm_sock_data(NihDBusMessage *message,
return d;
}
+#if 0
static const char *req_type_to_str(enum req_type r)
{
switch(r) {
@@ -90,9 +91,11 @@ static const char *req_type_to_str(enum req_type r)
case REQ_TYPE_GET_TASKS: return "get_tasks";
case REQ_TYPE_CHMOD: return "chmod";
case REQ_TYPE_LIST_CHILDREN: return "list_children";
+ case REQ_TYPE_REMOVE_ON_EMPTY: return "remove_on_empty";
default: return "invalid";
}
}
+#endif
/*
* All Scm-enhanced transactions take at least one SCM cred,
@@ -188,6 +191,7 @@ static void sock_scm_reader(struct scm_sock_data *data,
case REQ_TYPE_REMOVE: remove_scm_complete(data); break;
case REQ_TYPE_GET_TASKS: get_tasks_scm_complete(data); break;
case REQ_TYPE_LIST_CHILDREN: list_children_scm_complete(data); break;
+ case REQ_TYPE_REMOVE_ON_EMPTY: remove_on_empty_scm_complete(data); break;
default:
nih_fatal("%s: bad req_type %d", __func__, data->type);
exit(1);
@@ -1304,6 +1308,91 @@ int cgmanager_list_children (void *data, NihDBusMessage *message,
return ret;
}
+void remove_on_empty_scm_complete(struct scm_sock_data *data)
+{
+ char b = '0';
+
+ if (remove_on_empty_main(data->controller, data->cgroup, data->pcred,
+ data->rcred) == 0)
+ b = '1';
+ if (write(data->fd, &b, 1) < 0)
+ nih_error("RemoveOnEmptyScm: Error writing final result to client");
+}
+
+int cgmanager_remove_on_empty_scm (void *data, NihDBusMessage *message,
+ const char *controller, const char *cgroup, int sockfd)
+{
+ struct scm_sock_data *d;
+
+ d = alloc_scm_sock_data(message, sockfd, REQ_TYPE_REMOVE_ON_EMPTY);
+ if (!d)
+ return -1;
+ d->controller = NIH_MUST( nih_strdup(d, controller) );
+ d->cgroup = NIH_MUST( nih_strdup(d, cgroup) );
+
+ if (!nih_io_reopen(NULL, sockfd, NIH_IO_MESSAGE,
+ (NihIoReader) sock_scm_reader,
+ (NihIoCloseHandler) scm_sock_close,
+ scm_sock_error_handler, d)) {
+ NihError *error = nih_error_steal ();
+ nih_dbus_error_raise_printf (DBUS_ERROR_INVALID_ARGS,
+ "Failed queue scm message: %s", error->message);
+ nih_free(error);
+ return -1;
+ }
+ if (!kick_fd_client(sockfd)) {
+ nih_dbus_error_raise_printf (DBUS_ERROR_INVALID_ARGS,
+ "Error writing to client: %s", strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * This is one of the dbus callbacks.
+ * Caller requests that cgroup @cgroup in controller @controller be
+ * marked to be removed when it becomes empty, meaning there are no
+ * more sub-cgroups and no tasks.
+ */
+int cgmanager_remove_on_empty (void *data, NihDBusMessage *message,
+ const char *controller, const char *cgroup)
+{
+ int fd = 0, ret;
+ struct ucred rcred;
+ socklen_t len;
+
+ if (message == NULL) {
+ nih_dbus_error_raise_printf (DBUS_ERROR_INVALID_ARGS,
+ "message was null");
+ return -1;
+ }
+
+ if (!dbus_connection_get_socket(message->connection, &fd)) {
+ nih_dbus_error_raise_printf (DBUS_ERROR_INVALID_ARGS,
+ "Could not get client socket.");
+ return -1;
+ }
+
+ len = sizeof(struct ucred);
+ if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &rcred, &len) < 0) {
+ nih_dbus_error_raise_printf (DBUS_ERROR_INVALID_ARGS,
+ "Could not get peer cred: %s",
+ strerror(errno));
+ return -1;
+ }
+
+ nih_info (_("RemoveOnEmpty: Client fd is: %d (pid=%d, uid=%u, gid=%u)"),
+ fd, rcred.pid, rcred.uid, rcred.gid);
+
+ ret = remove_on_empty_main(controller, cgroup, rcred, rcred);
+ if (ret >= 0)
+ ret = 0;
+ else
+ nih_dbus_error_raise_printf (DBUS_ERROR_INVALID_ARGS,
+ "invalid request");
+ return ret;
+}
+
int
cgmanager_get_api_version(void *data, NihDBusMessage *message, int *version)
{
diff --git a/frontend.h b/frontend.h
index 935231f..0c6c1f6 100644
--- a/frontend.h
+++ b/frontend.h
@@ -103,6 +103,7 @@ enum req_type {
REQ_TYPE_CHMOD,
REQ_TYPE_MOVE_PID_ABS,
REQ_TYPE_LIST_CHILDREN,
+ REQ_TYPE_REMOVE_ON_EMPTY,
};
int get_pid_cgroup_main(void *parent, const char *controller,
@@ -141,6 +142,10 @@ int list_children_main (void *parent, const char *controller, const char *cgroup
struct ucred p, struct ucred r, char ***output);
void list_children_scm_complete(struct scm_sock_data *data);
+int remove_on_empty_main (const char *controller, const char *cgroup,
+ struct ucred p, struct ucred r);
+void remove_on_empty_scm_complete(struct scm_sock_data *data);
+
int cgmanager_ping (void *data, NihDBusMessage *message, int junk);
int client_connect (DBusServer *server, DBusConnection *conn);
diff --git a/fs.c b/fs.c
index e4bccbd..c245964 100644
--- a/fs.c
+++ b/fs.c
@@ -49,16 +49,39 @@
#include <nih-dbus/dbus_connection.h>
#include <nih-dbus/dbus_proxy.h>
+/* defines relating to the release agent */
+#define AGENT SBINDIR "/cgm-release-agent"
+#define AGENT_LINK_PATH "/run/cgmanager/agents"
+
struct controller_mounts {
char *controller;
char *options;
char *path;
+ char *src;
+ char *agent;
};
static struct controller_mounts *all_mounts;
static int num_controllers;
static char *base_path;
+
+bool file_exists(const char *path)
+{
+ struct stat sb;
+ if (stat(path, &sb) < 0)
+ return false;
+ return true;
+}
+
+bool dir_exists(const char *path)
+{
+ struct stat sb;
+ if (stat(path, &sb) < 0 || !S_ISDIR(sb.st_mode))
+ return false;
+ return true;
+}
+
/*
* Where do we want to mount the controllers? We used to mount
* them under a tmpfs under /sys/fs/cgroup, for all to share. Now
@@ -66,7 +89,7 @@ static char *base_path;
* TODO read this from configuration file too
* TODO do we want to create these in a tmpfs?
*/
-static bool setup_base_path(void)
+bool setup_base_run_path(void)
{
base_path = strdup("/run/cgmanager/fs");
if (!base_path) {
@@ -85,6 +108,10 @@ static bool setup_base_path(void)
nih_fatal("%s: failed to create /run/cgmanager/fs", __func__);
return false;
}
+ if (mkdir(AGENT_LINK_PATH, 0755) < 0 && errno != EEXIST) {
+ nih_fatal("%s: failed to create %s", __func__, AGENT_LINK_PATH);
+ return false;
+ }
return true;
}
@@ -128,7 +155,7 @@ static void set_use_hierarchy(const char *path)
fclose(f);
}
-static bool do_mount_subsys(char *s)
+static bool save_mount_subsys(char *s)
{
struct controller_mounts *tmp;
char *src, dest[MAXPATHLEN], *controller;
@@ -157,15 +184,6 @@ static bool do_mount_subsys(char *s)
ret = -1;
goto out;
}
- if (mkdir(dest, 0755) < 0 && errno != EEXIST) {
- nih_fatal("Failed to create %s: %s", dest, strerror(errno));
- ret = -1;
- goto out;
- }
- if ((ret = mount(src, dest, "cgroup", 0, src)) < 0) {
- nih_fatal("Failed mounting %s: %s", s, strerror(errno));
- goto out;
- }
ret = -1;
tmp = realloc(all_mounts, (num_controllers+1) * sizeof(*all_mounts));
if (!tmp) {
@@ -180,58 +198,99 @@ static bool do_mount_subsys(char *s)
}
all_mounts[num_controllers].options = NULL;
all_mounts[num_controllers].path = strdup(dest);
- if (!all_mounts[num_controllers].path) {
+ all_mounts[num_controllers].src = strdup(src);
+ if (!all_mounts[num_controllers].path ||
+ !all_mounts[num_controllers].src) {
nih_fatal("Out of memory mounting controllers");
goto out;
}
nih_info(_("Mounted %s onto %s"),
all_mounts[num_controllers].controller,
all_mounts[num_controllers].path);
- if (strcmp(all_mounts[num_controllers].controller, "cpuset") == 0) {
+ num_controllers++;
+ return true;
+
+out:
+ return false;
+}
+
+static bool set_release_agent(struct controller_mounts *m)
+{
+ FILE *f;
+ char path[MAXPATHLEN];
+ int ret;
+
+ ret = snprintf(path, MAXPATHLEN, "%s/release_agent", m->path);
+ if (ret < 0 || ret >= MAXPATHLEN) {
+ nih_error("out of memory");
+ return false;
+ }
+ if ((f = fopen(path, "w")) == NULL) {
+ nih_error("failed to open %s for writing", path);
+ return false;
+ }
+ if (fprintf(f, "%s\n", m->agent) < 0) {
+ nih_error("failed to set release agent for %s",
+ m->controller);
+ fclose(f);
+ return false;
+ }
+ if (fclose(f) != 0) {
+ nih_error("failed to set release agent for %s",
+ m->controller);
+ return false;
+ }
+ return true;
+}
+
+static bool do_mount_subsys(int i)
+{
+ char *src, *dest, *controller;
+ int ret;
+
+ dest = all_mounts[i].path;
+ controller = all_mounts[i].controller;
+ src = all_mounts[i].src;
+
+ if (mkdir(dest, 0755) < 0 && errno != EEXIST) {
+ nih_fatal("Failed to create %s: %s", dest, strerror(errno));
+ ret = -1;
+ goto out;
+ }
+ if ((ret = mount(src, dest, "cgroup", 0, src)) < 0) {
+ nih_fatal("Failed mounting %s: %s", dest, strerror(errno));
+ goto out;
+ }
+ nih_info(_("Mounted %s onto %s"), controller, dest);
+ if (strcmp(controller, "cpuset") == 0) {
set_clone_children(dest); // TODO make this optional?
nih_info(_("set clone_children"));
- } else if (strcmp(all_mounts[num_controllers].controller, "memory") == 0) {
+ } else if (strcmp(controller, "memory") == 0) {
set_use_hierarchy(dest); // TODO make this optional?
nih_info(_("set memory.use_hierarchy"));
}
- num_controllers++;
+
+ if (!set_release_agent(&all_mounts[i])) {
+ nih_error("failed to set release agent for %s",
+ all_mounts[i].controller);
+ return false;
+ }
return true;
out:
return false;
}
-/**
- * Mount the cgroup filesystems and record the information.
- * This should take configuration data from /etc. For now,
- * Just mount all controllers, separately just as cgroup-lite
- * does, and set the use_hierarchy and clone_children options.
- *
- * Things which should go into configuration file:
- * . which controllers to mount
- * . which controllers to co-mount
- * . any mount options (per-controller)
- * . values for sane_behavior, use_hierarchy, and clone_children
- */
-int setup_cgroup_mounts(char *extra_mounts)
+int collect_subsystems(char *extra_mounts)
{
FILE *cgf;
int ret;
char line[400];
- if (unshare(CLONE_NEWNS) < 0) {
- nih_fatal("Failed to unshare a private mount ns: %s", strerror(errno));
- return -1;
- }
- if (!setup_base_path()) {
- nih_fatal("Error setting up base cgroup path");
- return -1;
- }
-
if (extra_mounts) {
char *e;
for (e = strtok(extra_mounts, ","); e; e = strtok(NULL, ",")) {
- if (!do_mount_subsys(e)) {
+ if (!save_mount_subsys(e)) {
nih_fatal("Error loading subsystem \"%s\"", e);
return -1;
}
@@ -261,19 +320,99 @@ int setup_cgroup_mounts(char *extra_mounts)
#endif
}
- if (!do_mount_subsys(line)) {
- nih_fatal("Error mounting subsystem %s", line);
+ if (!save_mount_subsys(line)) {
+ nih_fatal("Error storing subsystem %s", line);
ret = -1;
goto out;
}
}
- nih_info(_("mounted %d controllers"), num_controllers);
+ nih_info(_("found %d controllers"), num_controllers);
ret = 0;
out:
fclose(cgf);
return ret;
}
+/**
+ * Mount the cgroup filesystems and record the information.
+ * This should take configuration data from /etc. For now,
+ * Just mount all controllers, separately just as cgroup-lite
+ * does, and set the use_hierarchy and clone_children options.
+ *
+ * Things which should go into configuration file:
+ * . which controllers to mount
+ * . which controllers to co-mount
+ * . any mount options (per-controller)
+ * . values for sane_behavior, use_hierarchy, and clone_children
+ */
+int setup_cgroup_mounts(void)
+{
+ int i;
+
+ if (unshare(CLONE_NEWNS) < 0) {
+ nih_fatal("Failed to unshare a private mount ns: %s", strerror(errno));
+ return 0;
+ }
+
+ for (i=0; i<num_controllers; i++) {
+ if (!do_mount_subsys(i)) {
+ nih_fatal("Failed mounting cgroups");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * In the old release agent support, the release agent is not told the
+ * controller in which the cgroup was freed. Therefore we need to have a
+ * different binary for each mounted controller. We will create these under
+ * /run/cgmanager/agents/ as symlinks to /sbin/cgm-release-agent, i.e.
+ * /run/cgmanager/agents/cgm-release-agent.freezer.
+ */
+bool create_agent_symlinks(void)
+{
+ struct stat statbuf;
+ char buf[MAXPATHLEN];
+ int i, ret, plen;
+
+ ret = stat(AGENT, &statbuf);
+ if (ret < 0) {
+ nih_error("release agent not found");
+ return false;
+ }
+
+ plen = snprintf(buf, MAXPATHLEN, "%s/", AGENT_LINK_PATH);
+ if (plen < 0 || plen >= MAXPATHLEN) {
+ nih_error("memory error");
+ return false;
+ }
+
+ for (i=0; i<num_controllers; i++) {
+ ret = snprintf(buf+plen, MAXPATHLEN-plen, "cgm-release-agent.%s",
+ all_mounts[i].controller);
+ if (ret < 0 || ret >= MAXPATHLEN) {
+ nih_error("path names too long");
+ return false;
+ }
+ nih_info("buf is %s", buf);
+ if (!file_exists(buf)) {
+ if (symlink(AGENT, buf) < 0) {
+ nih_error("failed to create release agent for %s",
+ all_mounts[i].controller);
+ return false;
+ }
+ }
+ if ((all_mounts[i].agent = strdup(buf)) == NULL) {
+ nih_error("out of memory");
+ return false;
+ }
+ }
+
+ return true;
+}
+
static inline void drop_newlines(char *s)
{
int l;
@@ -752,12 +891,16 @@ bool chmod_cgroup_path(const char *path, int mode)
/*
* TODO - make this more baroque to allow ranges etc
*/
-static char *set_value_blacklist[] = { "tasks", "release-agent", "cgroup.procs" };
+static char *set_value_blacklist[] = { "tasks",
+ "release-agent",
+ "cgroup.procs",
+ "notify-on-release"
+};
static size_t blacklist_len = sizeof(set_value_blacklist)/sizeof(char *);
-bool set_value(const char *path, const char *value)
+bool set_value_trusted(const char *path, const char *value)
{
- int i, len;
+ int len;
FILE *f;
nih_assert (path);
@@ -767,17 +910,6 @@ bool set_value(const char *path, const char *value)
len = strlen(value);
- for (i = 0; i < blacklist_len; i++) {
- const char *p = strrchr(path, '/');
- if (p)
- p++;
- else
- p = path;
- if (strcmp(p, set_value_blacklist[i]) == 0) {
- nih_error("attempted write to %s", set_value_blacklist[i]);
- return false;
- }
- }
if ((f = fopen(path, "w")) == NULL) {
nih_error("Error opening %s for writing", path);
return false;
@@ -796,6 +928,26 @@ bool set_value(const char *path, const char *value)
}
return true;
}
+bool set_value(const char *path, const char *value)
+{
+ int i;
+
+ nih_assert (path);
+
+ for (i = 0; i < blacklist_len; i++) {
+ const char *p = strrchr(path, '/');
+ if (p)
+ p++;
+ else
+ p = path;
+ if (strcmp(p, set_value_blacklist[i]) == 0) {
+ nih_error("attempted write to %s", set_value_blacklist[i]);
+ return false;
+ }
+ }
+
+ return set_value_trusted(path, value);
+}
/*
* Tiny helper to read the /proc/pid/ns/pid link for a given pid.
@@ -851,22 +1003,6 @@ bool realpath_escapes(char *path, char *safety)
return false;
}
-bool file_exists(const char *path)
-{
- struct stat sb;
- if (stat(path, &sb) < 0)
- return false;
- return true;
-}
-
-bool dir_exists(const char *path)
-{
- struct stat sb;
- if (stat(path, &sb) < 0 || !S_ISDIR(sb.st_mode))
- return false;
- return true;
-}
-
/*
* move_self_to_root: called by cgmanager at startup to make sure
* it starts in /
diff --git a/fs.h b/fs.h
index d595613..8e60ef3 100644
--- a/fs.h
+++ b/fs.h
@@ -24,7 +24,8 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-int setup_cgroup_mounts(char *extra_mounts);
+int collect_subsystems(char *extra_mounts);
+int setup_cgroup_mounts(void);
bool compute_pid_cgroup(pid_t pid, const char *controller, const char *cgroup,
char *path, int *depth);
int get_path_depth(const char *path);
@@ -38,6 +39,7 @@ bool hostuid_to_ns(uid_t uid, pid_t pid, uid_t *answer);
bool chown_cgroup_path(const char *path, uid_t uid, gid_t gid, bool all_children);
bool chmod_cgroup_path(const char *path, int mode);
bool set_value(const char *path, const char *value);
+bool set_value_trusted(const char *path, const char *value);
unsigned long read_pid_ns_link(int pid);
unsigned long read_user_ns_link(int pid);
bool realpath_escapes(char *path, char *safety);
@@ -45,3 +47,5 @@ bool file_exists(const char *path);
bool dir_exists(const char *path);
bool move_self_to_root(void);
int get_child_directories(void *parent, const char *path, char ***output);
+bool setup_base_run_path(void);
+bool create_agent_symlinks(void);
diff --git a/org.linuxcontainers.cgmanager.xml b/org.linuxcontainers.cgmanager.xml
index 619125b..d20af88 100644
--- a/org.linuxcontainers.cgmanager.xml
+++ b/org.linuxcontainers.cgmanager.xml
@@ -171,8 +171,16 @@
<arg name="cgroup" type="s" direction="in" />
<arg name="output" type="as" direction="out" />
</method>
+ <method name="RemoveOnEmptyScm">
+ <arg name="controller" type="s" direction="in" />
+ <arg name="cgroup" type="s" direction="in" />
+ <arg name="sockfd" type="h" direction="in" />
+ </method>
+ <method name="RemoveOnEmpty">
+ <arg name="controller" type="s" direction="in" />
+ <arg name="cgroup" type="s" direction="in" />
+ </method>
<!-- still to add: low priority,
- Prune (remove all empty decendents)
removeWhenEmpty
getEventfd
-->
diff --git a/tests/test21.sh b/tests/test21.sh
new file mode 100755
index 0000000..9c11172
--- /dev/null
+++ b/tests/test21.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+echo "Test 21: remove_on_empty"
+
+cg="test21_cg"
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.Remove string:'memory' string:$cg || true
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.Remove string:'devices' string:$cg || true
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.Create string:'memory' string:$cg || true
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.Create string:'devices' string:$cg || true
+sleep 200 &
+pid=$!
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.MovePid string:'memory' string:$cg int32:$pid
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.MovePid string:'devices' string:$cg int32:$pid
+
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.RemoveOnEmpty string:'memory' string:$cg
+
+kill $pid
+
+# now $cg should be deleted in memory, but not in devices
+# note if logind or upstart has set this for us then this will raise a false positive
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.GetTasks string:'devices' string:$cg >/dev/null 2>&1
+if [ $? -ne 0 ]; then
+ echo "Remove-on-empty affected another cgroup"
+ exit 1
+fi
+
+dbus-send --print-reply --address=unix:path=/sys/fs/cgroup/cgmanager/sock --type=method_call /org/linuxcontainers/cgmanager org.linuxcontainers.cgmanager0_0.GetTasks string:'memory' string:$cg >/dev/null 2>&1
+if [ $? -eq 0 ]; then
+ echo "Failed to remove-on-empty"
+ exit 1
+fi
+
+echo "Test 21 (remove_on_empty) passed"
+exit 0
--
1.9.1
More information about the cgmanager-devel
mailing list