[lxc-devel] [RFC 0/5] stop hook with namespace access

Serge Hallyn serge.hallyn at ubuntu.com
Mon Sep 28 15:14:20 UTC 2015


Quoting Wolfgang Bumiller (w.bumiller at proxmox.com):
> Just a quick followup:

What about actually shipping this in /usr/share/lxc/hooks/
from lxc/hooks ?

> I thought I'd drop a stop hook example here and a reason for why it's
> useful.
> 
> As I mentioned, it's mostly about unmounting NFS mountpoints. However,
> you could say that that's not usually an issue as the container
> usually performs a (hopefully) clean shutdown sequence which should
> unmount the filesystem anyway (or maybe just read-only remount it).
> And yes, most of the time when a container shuts down its shutdown
> sequence's unmount/sync commands would hang anyway, so why bother with
> the stop hook?
> 
> Two examples: 1) Some systems seem to default to force-unmounting, and
> the seccomp policy forbids this to avoid shooting lxcfs in the back.
> Eg. on a standard centos template I seem to have *always* lost my
> lxcfs on shutdown of a container until I started using the seccomp
> policy to forbid force-unmounting. The side effect of this is that the
> unmount call EPERMs and no unmounting is performed. This is where
> network deleays in NFS can become an issue for us.
> 
> 2) Assume a system running SystemD as init. Now send a SIGSEGV to pid
> 1 (yes, you're actually allowed to do that because it installs a
> SIGSEGV handler, and according to kill(2) you can send signals to pid
> 1 provided such a signal handler was actually installed). SystemD
> handles a first such signal by freezing itself with a nice log entry
> about this - I assume this is to not kill the whole system while
> giving you a chance to "deal". Sending yet another SIGSEGV finally
> kills it off.
> So do another `kill -11 1` and the system's gone without doing any
> kind of shutdown sequence.
> 
> I have attached a stop-hook.c that's supposed to unmount all
> filesystems inside the container after shutdown.

> /* vim: set ts=2 sts=2 sw=2 et: */
> #define _GNU_SOURCE    /* setns */
> #include <stdio.h>     /* fdopen, getmntent, endmntent */
> #include <stdlib.h>    /* malloc, qsort */
> #include <unistd.h>    /* close */
> #include <string.h>    /* strcmp, strncmp, strdup, strerror */
> #include <sched.h>     /* setns */
> #include <sys/mount.h> /* mount, umount2 */
> #include <sys/types.h> /* openat, open */
> #include <sys/stat.h>  /* openat, open */
> #include <fcntl.h>     /* openat, open */
> #include <mntent.h>    /* getmntent, endmntent */
> #include <errno.h>     /* errno */
> 
> typedef struct {
>   char *src; /* not used */
>   char *dst;
>   char *fs; /* not used */
> } Mount;
> 
> static void Mount_free(Mount *mnt) {
>   free(mnt->src);
>   free(mnt->dst);
>   free(mnt->fs);
> }
> 
> static int Mount_cmp_dst(const void *a_, const void *b_) {
>   Mount *a = (Mount*)a_;
>   Mount *b = (Mount*)b_;
>   return strcmp(b->dst, a->dst); /* swapped order */
> }
> 
> /* Unmounting /dev/pts fails, and  so /dev also fails, but /dev is not what
>  * we're interested in.
>  */
> static int Mount_should_error(const Mount *mnt) {
>   const char *dst = mnt->dst;
>   return !(strncmp(dst, "/dev", 4) == 0 && (dst[4] == 0 || dst[4] == '/'));
> }
> 
> /* Read mounts from 'self/mounts' relative to a directory filedescriptor.
>  * Before entering the container we open a handle to /proc on the host as we
>  * need to access /proc/self/mounts and the container's /proc doesn't contain
>  * our /self. We then use openat(2) to avoid having to mount a temporary /proc.
>  */
> static int read_mounts(int procfd, Mount **mp, size_t *countp) {
>   int fd;
>   struct mntent *ent;
>   FILE *mf;
>   size_t capacity = 32;
>   size_t count = 0;
>   Mount *mounts = (Mount*)malloc(capacity * sizeof(*mounts));
> 
>   *mp = NULL;
>   *countp = 0;
> 
>   fd = openat(procfd, "self/mounts", O_RDONLY);
>   if (fd < 0)
>     return 0;
> 
>   mf = fdopen(fd, "r");
>   if (!mf) {
>     int error = errno;
>     close(fd);
>     errno = error;
>     return 0;
>   }
>   while ((ent = getmntent(mf))) {
>     if (count == capacity) {
>       capacity *= 2;
>       mounts = (Mount*)realloc(mounts, capacity * sizeof(*mounts));
>     }
>     mounts[count].src = strdup(ent->mnt_fsname);
>     mounts[count].dst = strdup(ent->mnt_dir);
>     mounts[count].fs  = strdup(ent->mnt_type);
>     ++count;
>   }
>   endmntent(mf);
> 
>   *mp = mounts;
>   *countp = count;
> 
>   return 1;
> }
> 
> int main(int argc, char **argv) {
>   int i, procfd, ctmntfd;
>   Mount *mounts;
>   size_t zi, count = 0;
>   const char *mntns = NULL;
> 
>   if (argc < 4 || strcmp(argv[2], "lxc") != 0) {
>     fprintf(stderr, "%s: usage error, expected LXC hook arguments\n", argv[0]);
>     return 2;
>   }
> 
>   if (strcmp(argv[3], "stop") != 0)
>     return 0;
> 
>   for (i = 4; i != argc; ++i) {
>     if (!strncmp(argv[i], "mnt:", 4))
>       mntns = argv[i] + 4;
>   }
> 
>   if (!mntns) {
>     fprintf(stderr, "%s: no mount namespace provided\n", argv[0]);
>     return 3;
>   }
> 
>   /* Open a handle to /proc on the host as we need to access /proc/self/mounts
>    * and the container's /proc doesn't contain our /self. See read_mounts().
>    */
>   procfd = open("/proc", O_RDONLY | O_DIRECTORY | O_PATH);
>   if (procfd < 0) {
>     fprintf(stderr, "%s: failed to open /proc: %s\n", argv[0], strerror(errno));
>     return 4;
>   }
> 
>   /* Open the mount namespace and enter it. */
>   ctmntfd = open(mntns, O_RDONLY);
>   if (ctmntfd < 0) {
>     fprintf(stderr, "%s: failed to open mount namespace: %s\n",
>             argv[0], strerror(errno));
>     close(procfd);
>     return 5;
>   }
> 
>   if (setns(ctmntfd, CLONE_NEWNS) != 0) {
>     fprintf(stderr, "%s: failed to attach to namespace: %s\n",
>             argv[0], strerror(errno));
>     close(ctmntfd);
>     close(procfd);
>     return 6;
>   }
>   close(ctmntfd);
> 
>   /* Now read [[procfd]]/self/mounts */
>   if (!read_mounts(procfd, &mounts, &count)) {
>     fprintf(stderr, "%s: failed to read mountpoints: %s\n",
>             argv[0], strerror(errno));
>     close(procfd);
>     return 7;
>   }
>   close(procfd);
> 
>   /* Just sort to get a sane unmount-order... */
>   qsort(mounts, count, sizeof(*mounts), &Mount_cmp_dst);
> 
>   for (zi = 0; zi != count; ++zi) {
>     /* fprintf(stderr, "Unmount: %s\n", mounts[zi].dst); */
>     if (umount2(mounts[zi].dst, 0) != 0) {
>       int error = errno;
>       if (Mount_should_error(&mounts[zi])) {
>         fprintf(stderr, "%s: failed to unmount %s: %s\n",
>                 argv[0], mounts[zi].dst, strerror(error));
>       }
>     }
>     Mount_free(&mounts[zi]);
>   }
>   free(mounts);
> 
>   return 0;
> }

> _______________________________________________
> lxc-devel mailing list
> lxc-devel at lists.linuxcontainers.org
> http://lists.linuxcontainers.org/listinfo/lxc-devel



More information about the lxc-devel mailing list