[lxc-devel] [RFC 0/5] stop hook with namespace access
Serge Hallyn
serge.hallyn at ubuntu.com
Mon Sep 28 15:14:20 UTC 2015
Quoting Wolfgang Bumiller (w.bumiller at proxmox.com):
> Just a quick followup:
What about actually shipping this in /usr/share/lxc/hooks/
from lxc/hooks ?
> I thought I'd drop a stop hook example here and a reason for why it's
> useful.
>
> As I mentioned, it's mostly about unmounting NFS mountpoints. However,
> you could say that that's not usually an issue as the container
> usually performs a (hopefully) clean shutdown sequence which should
> unmount the filesystem anyway (or maybe just read-only remount it).
> And yes, most of the time when a container shuts down its shutdown
> sequence's unmount/sync commands would hang anyway, so why bother with
> the stop hook?
>
> Two examples: 1) Some systems seem to default to force-unmounting, and
> the seccomp policy forbids this to avoid shooting lxcfs in the back.
> Eg. on a standard centos template I seem to have *always* lost my
> lxcfs on shutdown of a container until I started using the seccomp
> policy to forbid force-unmounting. The side effect of this is that the
> unmount call EPERMs and no unmounting is performed. This is where
> network deleays in NFS can become an issue for us.
>
> 2) Assume a system running SystemD as init. Now send a SIGSEGV to pid
> 1 (yes, you're actually allowed to do that because it installs a
> SIGSEGV handler, and according to kill(2) you can send signals to pid
> 1 provided such a signal handler was actually installed). SystemD
> handles a first such signal by freezing itself with a nice log entry
> about this - I assume this is to not kill the whole system while
> giving you a chance to "deal". Sending yet another SIGSEGV finally
> kills it off.
> So do another `kill -11 1` and the system's gone without doing any
> kind of shutdown sequence.
>
> I have attached a stop-hook.c that's supposed to unmount all
> filesystems inside the container after shutdown.
> /* vim: set ts=2 sts=2 sw=2 et: */
> #define _GNU_SOURCE /* setns */
> #include <stdio.h> /* fdopen, getmntent, endmntent */
> #include <stdlib.h> /* malloc, qsort */
> #include <unistd.h> /* close */
> #include <string.h> /* strcmp, strncmp, strdup, strerror */
> #include <sched.h> /* setns */
> #include <sys/mount.h> /* mount, umount2 */
> #include <sys/types.h> /* openat, open */
> #include <sys/stat.h> /* openat, open */
> #include <fcntl.h> /* openat, open */
> #include <mntent.h> /* getmntent, endmntent */
> #include <errno.h> /* errno */
>
> typedef struct {
> char *src; /* not used */
> char *dst;
> char *fs; /* not used */
> } Mount;
>
> static void Mount_free(Mount *mnt) {
> free(mnt->src);
> free(mnt->dst);
> free(mnt->fs);
> }
>
> static int Mount_cmp_dst(const void *a_, const void *b_) {
> Mount *a = (Mount*)a_;
> Mount *b = (Mount*)b_;
> return strcmp(b->dst, a->dst); /* swapped order */
> }
>
> /* Unmounting /dev/pts fails, and so /dev also fails, but /dev is not what
> * we're interested in.
> */
> static int Mount_should_error(const Mount *mnt) {
> const char *dst = mnt->dst;
> return !(strncmp(dst, "/dev", 4) == 0 && (dst[4] == 0 || dst[4] == '/'));
> }
>
> /* Read mounts from 'self/mounts' relative to a directory filedescriptor.
> * Before entering the container we open a handle to /proc on the host as we
> * need to access /proc/self/mounts and the container's /proc doesn't contain
> * our /self. We then use openat(2) to avoid having to mount a temporary /proc.
> */
> static int read_mounts(int procfd, Mount **mp, size_t *countp) {
> int fd;
> struct mntent *ent;
> FILE *mf;
> size_t capacity = 32;
> size_t count = 0;
> Mount *mounts = (Mount*)malloc(capacity * sizeof(*mounts));
>
> *mp = NULL;
> *countp = 0;
>
> fd = openat(procfd, "self/mounts", O_RDONLY);
> if (fd < 0)
> return 0;
>
> mf = fdopen(fd, "r");
> if (!mf) {
> int error = errno;
> close(fd);
> errno = error;
> return 0;
> }
> while ((ent = getmntent(mf))) {
> if (count == capacity) {
> capacity *= 2;
> mounts = (Mount*)realloc(mounts, capacity * sizeof(*mounts));
> }
> mounts[count].src = strdup(ent->mnt_fsname);
> mounts[count].dst = strdup(ent->mnt_dir);
> mounts[count].fs = strdup(ent->mnt_type);
> ++count;
> }
> endmntent(mf);
>
> *mp = mounts;
> *countp = count;
>
> return 1;
> }
>
> int main(int argc, char **argv) {
> int i, procfd, ctmntfd;
> Mount *mounts;
> size_t zi, count = 0;
> const char *mntns = NULL;
>
> if (argc < 4 || strcmp(argv[2], "lxc") != 0) {
> fprintf(stderr, "%s: usage error, expected LXC hook arguments\n", argv[0]);
> return 2;
> }
>
> if (strcmp(argv[3], "stop") != 0)
> return 0;
>
> for (i = 4; i != argc; ++i) {
> if (!strncmp(argv[i], "mnt:", 4))
> mntns = argv[i] + 4;
> }
>
> if (!mntns) {
> fprintf(stderr, "%s: no mount namespace provided\n", argv[0]);
> return 3;
> }
>
> /* Open a handle to /proc on the host as we need to access /proc/self/mounts
> * and the container's /proc doesn't contain our /self. See read_mounts().
> */
> procfd = open("/proc", O_RDONLY | O_DIRECTORY | O_PATH);
> if (procfd < 0) {
> fprintf(stderr, "%s: failed to open /proc: %s\n", argv[0], strerror(errno));
> return 4;
> }
>
> /* Open the mount namespace and enter it. */
> ctmntfd = open(mntns, O_RDONLY);
> if (ctmntfd < 0) {
> fprintf(stderr, "%s: failed to open mount namespace: %s\n",
> argv[0], strerror(errno));
> close(procfd);
> return 5;
> }
>
> if (setns(ctmntfd, CLONE_NEWNS) != 0) {
> fprintf(stderr, "%s: failed to attach to namespace: %s\n",
> argv[0], strerror(errno));
> close(ctmntfd);
> close(procfd);
> return 6;
> }
> close(ctmntfd);
>
> /* Now read [[procfd]]/self/mounts */
> if (!read_mounts(procfd, &mounts, &count)) {
> fprintf(stderr, "%s: failed to read mountpoints: %s\n",
> argv[0], strerror(errno));
> close(procfd);
> return 7;
> }
> close(procfd);
>
> /* Just sort to get a sane unmount-order... */
> qsort(mounts, count, sizeof(*mounts), &Mount_cmp_dst);
>
> for (zi = 0; zi != count; ++zi) {
> /* fprintf(stderr, "Unmount: %s\n", mounts[zi].dst); */
> if (umount2(mounts[zi].dst, 0) != 0) {
> int error = errno;
> if (Mount_should_error(&mounts[zi])) {
> fprintf(stderr, "%s: failed to unmount %s: %s\n",
> argv[0], mounts[zi].dst, strerror(error));
> }
> }
> Mount_free(&mounts[zi]);
> }
> free(mounts);
>
> return 0;
> }
> _______________________________________________
> lxc-devel mailing list
> lxc-devel at lists.linuxcontainers.org
> http://lists.linuxcontainers.org/listinfo/lxc-devel
More information about the lxc-devel
mailing list