[Lxc-users] [PATCH 4/9] pin a container's rootfs

Daniel Lezcano daniel.lezcano at free.fr
Sun May 27 02:26:22 UTC 2012


On 04/26/2012 07:09 AM, Serge Hallyn wrote:
> From: Serge Hallyn<serge.hallyn at ubuntu.com>
>
> If /var/lib/lxc is a separate filesystem, and you start and stop only
> a single container which has it's rootfs at /var/lib/lxc/c1/rootfs,
> then /var/lib/lxc will be re-mounted readonly when the container, at
> shutdown, does 'mount -o remount,ro /'.  (Precise hosts actually
> now prevent this using apparmor, but others do not)
>
> The reason this doesn't normally happen is that the container's
> remount attempt fails because the fs is busy.  We can force the fs
> to be busy by holding a file open on the fs.  So, when starting a
> container, open a file called /var/lib/lxc/c1/rootfs.pin, and keep
> it open until the container is shut down.
>
> Note that Guido had some good feedback on this patch, but I've not had
> the time to consider implementing them.
>
> Changelog: Apr 25: Don't fail if the container doesn't have a
> specified rootfs.
>
> Signed-off-by: Serge Hallyn<serge.hallyn at ubuntu.com>
> ---
>   src/lxc/conf.c  |   49 +++++++++++++++++++++++++++++++++++++++++++++++++
>   src/lxc/conf.h  |    2 ++
>   src/lxc/start.c |   16 ++++++++++++++++
>   3 files changed, 67 insertions(+)
>
> diff --git a/src/lxc/conf.c b/src/lxc/conf.c
> index e8088bb..b0ce92b 100644
> --- a/src/lxc/conf.c
> +++ b/src/lxc/conf.c
> @@ -452,6 +452,55 @@ static int mount_rootfs_block(const char *rootfs, const char *target)
>   	return mount_unknow_fs(rootfs, target, 0);
>   }
>
> +/*
> + * pin_rootfs
> + * if rootfs is a directory, then open ${rootfs}.hold for writing for the
> + * duration of the container run, to prevent the container from marking the
> + * underlying fs readonly on shutdown.
> + * return -1 on error.
> + * return -2 if nothing needed to be pinned.
> + * return an open fd (>=0) if we pinned it.
> + */
> +int pin_rootfs(const char *rootfs)
> +{
> +	char absrootfs[MAXPATHLEN];
> +	char absrootfspin[MAXPATHLEN];
> +	struct stat s;
> +	int ret, fd;
> +
> +	/* it's possible to not specify a rootfs, don't make that fail */
> +	if (rootfs == NULL || strlen(rootfs) == 0)
> +		return 0;
> +
> +	if (!realpath(rootfs, absrootfs)) {
> +		SYSERROR("failed to get real path for '%s'", rootfs);
> +		return -1;
> +	}
> +
> +	if (access(absrootfs, F_OK)) {
> +		SYSERROR("'%s' is not accessible", absrootfs);
> +		return -1;
> +	}
> +
> +	if (stat(absrootfs,&s)) {
> +		SYSERROR("failed to stat '%s'", absrootfs);
> +		return -1;
> +	}
> +
> +	if (!__S_ISTYPE(s.st_mode, S_IFDIR))
> +		return -2;

I think you can get ride of all these checks if the function is invoked 
from the right place.

> +
> +	ret = snprintf(absrootfspin, MAXPATHLEN, "%s%s", absrootfs, ".hold");
> +	if (ret>= MAXPATHLEN) {
> +		SYSERROR("pathname too long for rootfs hold file");
> +		return -1;
> +	}

Why create an intermediate directory and not open the rootfs directory 
directly ?

> +
> +	fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
> +	INFO("opened %s as fd %d\n", absrootfspin, fd);
> +	return fd;
> +}

If I understand correctly, you open the directory, let the file 
descriptor being inherited in the container and then close the file 
descriptor, right ?

> +
>   static int mount_rootfs(const char *rootfs, const char *target)
>   {
>   	char absrootfs[MAXPATHLEN];
> diff --git a/src/lxc/conf.h b/src/lxc/conf.h
> index 09f55cb..0d8f28e 100644
> --- a/src/lxc/conf.h
> +++ b/src/lxc/conf.h
> @@ -223,6 +223,8 @@ struct lxc_conf {
>    */
>   extern struct lxc_conf *lxc_conf_init(void);
>
> +extern int pin_rootfs(const char *rootfs);
> +
>   extern int lxc_create_network(struct lxc_handler *handler);
>   extern void lxc_delete_network(struct lxc_list *networks);
>   extern int lxc_assign_network(struct lxc_list *networks, pid_t pid);
> diff --git a/src/lxc/start.c b/src/lxc/start.c
> index 7af1e37..96ddd5b 100644
> --- a/src/lxc/start.c
> +++ b/src/lxc/start.c
> @@ -534,6 +534,7 @@ int lxc_spawn(struct lxc_handler *handler)
>   	int clone_flags;
>   	int failed_before_rename = 0;
>   	const char *name = handler->name;
> +	int pinfd;
>
>   	if (lxc_sync_init(handler))
>   		return -1;
> @@ -563,6 +564,17 @@ int lxc_spawn(struct lxc_handler *handler)
>   		}
>   	}
>
> +	/*
> +	 * if the rootfs is not a blockdev, prevent the container from
> +	 * marking it readonly.
> +	 */
> +
> +	pinfd = pin_rootfs(handler->conf->rootfs.path);
> +	if (pinfd == -1) {
> +		ERROR("failed to pin the container's rootfs");
> +		goto out_abort;
> +	}
> +

It is not the right place to do that. That should be done from the 
setup_rootfs function in conf.c or in the mount_rootfs_dir function.

>   	/* Create a process in a new set of namespaces */
>   	handler->pid = lxc_clone(do_start, handler, clone_flags);
>   	if (handler->pid<  0) {
> @@ -605,6 +617,10 @@ int lxc_spawn(struct lxc_handler *handler)
>   	}
>
>   	lxc_sync_fini(handler);
> +
> +	if (pinfd>= 0)
> +		close(pinfd);
> +
>   	return 0;
>
>   out_delete_net:





More information about the lxc-users mailing list