[Lxc-users] [PATCH 4/9] pin a container's rootfs
Daniel Lezcano
daniel.lezcano at free.fr
Sun May 27 02:26:22 UTC 2012
On 04/26/2012 07:09 AM, Serge Hallyn wrote:
> From: Serge Hallyn<serge.hallyn at ubuntu.com>
>
> If /var/lib/lxc is a separate filesystem, and you start and stop only
> a single container which has it's rootfs at /var/lib/lxc/c1/rootfs,
> then /var/lib/lxc will be re-mounted readonly when the container, at
> shutdown, does 'mount -o remount,ro /'. (Precise hosts actually
> now prevent this using apparmor, but others do not)
>
> The reason this doesn't normally happen is that the container's
> remount attempt fails because the fs is busy. We can force the fs
> to be busy by holding a file open on the fs. So, when starting a
> container, open a file called /var/lib/lxc/c1/rootfs.pin, and keep
> it open until the container is shut down.
>
> Note that Guido had some good feedback on this patch, but I've not had
> the time to consider implementing them.
>
> Changelog: Apr 25: Don't fail if the container doesn't have a
> specified rootfs.
>
> Signed-off-by: Serge Hallyn<serge.hallyn at ubuntu.com>
> ---
> src/lxc/conf.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
> src/lxc/conf.h | 2 ++
> src/lxc/start.c | 16 ++++++++++++++++
> 3 files changed, 67 insertions(+)
>
> diff --git a/src/lxc/conf.c b/src/lxc/conf.c
> index e8088bb..b0ce92b 100644
> --- a/src/lxc/conf.c
> +++ b/src/lxc/conf.c
> @@ -452,6 +452,55 @@ static int mount_rootfs_block(const char *rootfs, const char *target)
> return mount_unknow_fs(rootfs, target, 0);
> }
>
> +/*
> + * pin_rootfs
> + * if rootfs is a directory, then open ${rootfs}.hold for writing for the
> + * duration of the container run, to prevent the container from marking the
> + * underlying fs readonly on shutdown.
> + * return -1 on error.
> + * return -2 if nothing needed to be pinned.
> + * return an open fd (>=0) if we pinned it.
> + */
> +int pin_rootfs(const char *rootfs)
> +{
> + char absrootfs[MAXPATHLEN];
> + char absrootfspin[MAXPATHLEN];
> + struct stat s;
> + int ret, fd;
> +
> + /* it's possible to not specify a rootfs, don't make that fail */
> + if (rootfs == NULL || strlen(rootfs) == 0)
> + return 0;
> +
> + if (!realpath(rootfs, absrootfs)) {
> + SYSERROR("failed to get real path for '%s'", rootfs);
> + return -1;
> + }
> +
> + if (access(absrootfs, F_OK)) {
> + SYSERROR("'%s' is not accessible", absrootfs);
> + return -1;
> + }
> +
> + if (stat(absrootfs,&s)) {
> + SYSERROR("failed to stat '%s'", absrootfs);
> + return -1;
> + }
> +
> + if (!__S_ISTYPE(s.st_mode, S_IFDIR))
> + return -2;
I think you can get ride of all these checks if the function is invoked
from the right place.
> +
> + ret = snprintf(absrootfspin, MAXPATHLEN, "%s%s", absrootfs, ".hold");
> + if (ret>= MAXPATHLEN) {
> + SYSERROR("pathname too long for rootfs hold file");
> + return -1;
> + }
Why create an intermediate directory and not open the rootfs directory
directly ?
> +
> + fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
> + INFO("opened %s as fd %d\n", absrootfspin, fd);
> + return fd;
> +}
If I understand correctly, you open the directory, let the file
descriptor being inherited in the container and then close the file
descriptor, right ?
> +
> static int mount_rootfs(const char *rootfs, const char *target)
> {
> char absrootfs[MAXPATHLEN];
> diff --git a/src/lxc/conf.h b/src/lxc/conf.h
> index 09f55cb..0d8f28e 100644
> --- a/src/lxc/conf.h
> +++ b/src/lxc/conf.h
> @@ -223,6 +223,8 @@ struct lxc_conf {
> */
> extern struct lxc_conf *lxc_conf_init(void);
>
> +extern int pin_rootfs(const char *rootfs);
> +
> extern int lxc_create_network(struct lxc_handler *handler);
> extern void lxc_delete_network(struct lxc_list *networks);
> extern int lxc_assign_network(struct lxc_list *networks, pid_t pid);
> diff --git a/src/lxc/start.c b/src/lxc/start.c
> index 7af1e37..96ddd5b 100644
> --- a/src/lxc/start.c
> +++ b/src/lxc/start.c
> @@ -534,6 +534,7 @@ int lxc_spawn(struct lxc_handler *handler)
> int clone_flags;
> int failed_before_rename = 0;
> const char *name = handler->name;
> + int pinfd;
>
> if (lxc_sync_init(handler))
> return -1;
> @@ -563,6 +564,17 @@ int lxc_spawn(struct lxc_handler *handler)
> }
> }
>
> + /*
> + * if the rootfs is not a blockdev, prevent the container from
> + * marking it readonly.
> + */
> +
> + pinfd = pin_rootfs(handler->conf->rootfs.path);
> + if (pinfd == -1) {
> + ERROR("failed to pin the container's rootfs");
> + goto out_abort;
> + }
> +
It is not the right place to do that. That should be done from the
setup_rootfs function in conf.c or in the mount_rootfs_dir function.
> /* Create a process in a new set of namespaces */
> handler->pid = lxc_clone(do_start, handler, clone_flags);
> if (handler->pid< 0) {
> @@ -605,6 +617,10 @@ int lxc_spawn(struct lxc_handler *handler)
> }
>
> lxc_sync_fini(handler);
> +
> + if (pinfd>= 0)
> + close(pinfd);
> +
> return 0;
>
> out_delete_net:
More information about the lxc-users
mailing list