[lxc-devel] [PATCH 1/2] autodev: switch strategies (v2)

Stéphane Graber stgraber at ubuntu.com
Mon Jan 12 21:31:52 UTC 2015


On Fri, Jan 09, 2015 at 09:38:18PM +0000, Serge Hallyn wrote:
> Do not keep container devs under /dev/.lxc.  Instead, always
> keep them in a small tmpfs mounted at $(mounted_root)/dev.
> 
> The tmpfs is mounted in the container monitor's namespace.  This
> means that at every reboot it will get re-created.  It seems to
> me this better replicates what happens on a real host.
> 
> If we want devices persisting across reboots, then perhaps we can
> implement a $lxcpath/$name/keepdev directory containing devices to
> bind into the container at each startup.
> 
> Changelog (v2): don't bother with the $lxcpath/$name/rootfs.dev
> 	directory, just mount the tmpfs straight into the container.
> 
> Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
> ---
>  src/lxc/conf.c  | 318 ++++++--------------------------------------------------
>  src/lxc/start.c |   1 -
>  2 files changed, 31 insertions(+), 288 deletions(-)
> 
> diff --git a/src/lxc/conf.c b/src/lxc/conf.c
> index 72181dd..dad79ab 100644
> --- a/src/lxc/conf.c
> +++ b/src/lxc/conf.c
> @@ -94,10 +94,7 @@
>  
>  lxc_log_define(lxc_conf, lxc);
>  
> -#define MAXHWLEN    18
> -#define MAXINDEXLEN 20
> -#define MAXMTULEN   16
> -#define MAXLINELEN  128
> +#define LINELEN 4096
>  
>  #if HAVE_SYS_CAPABILITY_H
>  #ifndef CAP_SETFCAP
> @@ -295,9 +292,6 @@ static struct caps_opt caps_opt[] = {
>  static struct caps_opt caps_opt[] = {};
>  #endif
>  
> -const char *dev_base_path = "/dev/.lxc";
> -const char *dev_user_path = "/dev/.lxc/user";
> -
>  static int run_buffer(char *buffer)
>  {
>  	struct lxc_popen_FILE *f;
> @@ -1092,247 +1086,55 @@ fail:
>  }
>  
>  /*
> - * Check to see if a directory has something mounted on it and,
> - * if it does, return the fstype.
> - *
> - * Code largely based on detect_shared_rootfs below
> - *
> - * Returns: # of matching entries in /proc/self/mounts
> - * 	if != 0 fstype is filled with the last filesystem value.
> - * 	if == 0 no matches found, fstype unchanged.
> - *
> - * ToDo: Maybe return the mount options in another parameter...
> + * Just create a path for /dev under $lxcpath/$name and in rootfs
> + * If we hit an error, log it but don't fail yet.
>   */
> -
> -#define LINELEN 4096
> -#define MAX_FSTYPE_LEN 128
> -static int mount_check_fs( const char *dir, char *fstype )
> +static void create_devdir(const char *path)
>  {
> -	char buf[LINELEN], *p;
> -	struct stat s;
> -	FILE *f;
> -	int found_fs = 0;
> -	char *p2;
> -
> -	DEBUG("entering mount_check_fs for %s", dir);
> -
> -	if ( 0 != access(dir, F_OK) || 0 != stat(dir, &s) || 0 == S_ISDIR(s.st_mode) ) {
> -		return 0;
> -	}
> -
> -	f = fopen("/proc/self/mounts", "r");
> -	if (!f)
> -		return 0;
> -	while (fgets(buf, LINELEN, f)) {
> -		p = index(buf, ' ');
> -		if( !p )
> -			continue;
> -		*p = '\0';
> -		p2 = p + 1;
> -
> -		p = index(p2, ' ');
> -		if( !p )
> -			continue;
> -		*p = '\0';
> -
> -		/* Compare the directory in the entry to desired */
> -		if( strcmp( p2, dir ) ) {
> -			continue;
> -		}
> -
> -		p2 = p + 1;
> -		p = index( p2, ' ');
> -		if( !p )
> -			continue;
> -		*p = '\0';
> -
> -		++found_fs;
> -
> -		if( fstype ) {
> -			strncpy( fstype, p2, MAX_FSTYPE_LEN - 1 );
> -			fstype [ MAX_FSTYPE_LEN - 1 ] = '\0';
> -		}
> -	}
> -
> -	fclose(f);
> -
> -	DEBUG("mount_check_fs returning %d last %s", found_fs, fstype);
> -
> -	return found_fs;
> -}
> -
> -/*
> - * Locate a devtmpfs mount (should be on /dev) and create a container
> - * subdirectory on it which we can then bind mount to the container
> - * /dev instead of mounting a tmpfs there.
> - * If we fail, return NULL.
> - * Else return the pointer to the name buffer with the string to
> - * the devtmpfs subdirectory.
> - */
> -
> -static char *mk_devtmpfs(const char *name, char *path, const char *lxcpath)
> -{
> -	int ret;
> -	struct stat s;
> -	char tmp_path[MAXPATHLEN];
> -	char fstype[MAX_FSTYPE_LEN];
> -	uint64_t hash;
> -
> -	if ( 0 != access(dev_base_path, F_OK) || 0 != stat(dev_base_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
> -		/* This is just making /dev/.lxc it better work or we're done */
> -		ret = mkdir(dev_base_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
> -		if ( ret ) {
> -			SYSERROR( "Unable to create /dev/.lxc for autodev" );
> -			return NULL;
> -		}
> -	}
> -
> -	/*
> -	 * Programmers notes:
> -	 * 	We can not do mounts in this area of code that we want
> -	 * 	to be visible in the host.  Consequently, /dev/.lxc must
> -	 * 	be set up earlier if we need a tmpfs mounted there.
> -	 * 	That only affects the rare cases where autodev is enabled
> -	 * 	for a container and devtmpfs is not mounted on /dev in the
> -	 * 	host.  In that case, we'll fall back to the old method
> -	 * 	of mounting a tmpfs in the container and have no visibility
> -	 * 	into the container /dev.
> -	 */
> -	if( ! mount_check_fs( "/dev", fstype )
> -		|| strcmp( "devtmpfs", fstype ) ) {
> -		/* Either /dev was not mounted or was not devtmpfs */
> -
> -		if ( ! mount_check_fs( "/dev/.lxc", NULL ) ) {
> -			/*
> -			 * /dev/.lxc is not already mounted
> -			 * Doing a mount here does no good, since
> -			 * it's not visible in the host.
> -			 */
> -
> -			ERROR("/dev/.lxc is not setup - taking fallback" );
> -			return NULL;
> -		}
> -	}
> -
> -	if ( 0 != access(dev_user_path, F_OK) || 0 != stat(dev_user_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
> -		/*
> -		 * This is making /dev/.lxc/user path for non-priv users.
> -		 * If this doesn't work, we'll have to fall back in the
> -		 * case of non-priv users.  It's mode 1777 like /tmp.
> -		 */
> -		ret = mkdir(dev_user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
> -		if ( ret ) {
> -			/* Issue an error but don't fail yet! */
> -			ERROR("Unable to create /dev/.lxc/user");
> -		}
> -		/* Umask tends to screw us up here */
> -		chmod(dev_user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
> -	}
> -
> -	/*
> -	 * Since the container name must be unique within a given
> -	 * lxcpath, we're going to use a hash of the path
> -	 * /lxcpath/name as our hash name in /dev/.lxc/
> -	 */
> -
> -	ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s", lxcpath, name);
> -	if (ret < 0 || ret >= MAXPATHLEN)
> -		return NULL;
> -
> -	hash = fnv_64a_buf(tmp_path, ret, FNV1A_64_INIT);
> -
> -	ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, dev_base_path, name, hash);
> -	if (ret < 0 || ret >= MAXPATHLEN)
> -		return NULL;
> -
> -	if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
> -		ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
> -		if ( ret ) {
> -			/* Something must have failed with the dev_base_path...
> -			 * Maybe unpriv user.  Try dev_user_path now... */
> -			INFO("Setup in /dev/.lxc failed.  Trying /dev/.lxc/user." );
> -
> -			ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, dev_user_path, name, hash);
> -			if (ret < 0 || ret >= MAXPATHLEN)
> -				return NULL;
> -
> -			if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
> -				ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
> -				if ( ret ) {
> -					ERROR("Container /dev setup in host /dev failed - taking fallback" );
> -					return NULL;
> -				}
> -			}
> -		}
> -	}
> -
> -	strcpy( path, tmp_path );
> -	return path;
> +	int ret = mkdir(path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
> +	if (ret) /* Issue an error but don't fail yet! */
> +		SYSERROR("Unable to create devpath %s", path);
> +	ret = chmod(path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
> +	if (ret)
> +		SYSERROR("Failed to chown devpath %s", path);
> +	INFO("Created %s", path);
>  }
>  
> -/*
> - * Do we want to add options for max size of /dev and a file to
> - * specify which devices to create?
> - */
>  static int mount_autodev(const char *name, char *root, const char *lxcpath)
>  {
>  	int ret;
> -	struct stat s;
> -	char path[MAXPATHLEN];
> -	char host_path[MAXPATHLEN];
> -	char devtmpfs_path[MAXPATHLEN];
> +	size_t clen;
> +	char *path;
>  
>  	INFO("Mounting /dev under %s", root);
>  
> -	ret = snprintf(host_path, MAXPATHLEN, "%s/%s/rootfs.dev", lxcpath, name);
> -	if (ret < 0 || ret > MAXPATHLEN)
> -		return -1;
> +	/* $(root) + "/dev/pts" + '\0' */
> +	clen = strlen(root) + 9;
> +	path = alloca(clen);
>  
> -	ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
> -	if (ret < 0 || ret > MAXPATHLEN)
> +	ret = snprintf(path, clen, "%s/dev", root);
> +	if (ret < 0 || ret >= clen)
>  		return -1;
>  
> -	if (mk_devtmpfs( name, devtmpfs_path, lxcpath ) ) {
> -		/*
> -		 * Get rid of old links and directoriess
> -		 * This could be either a symlink and we remove it,
> -		 * or an empty directory and we remove it,
> -		 * or non-existent and we don't care,
> -		 * or a non-empty directory, and we will then emit an error
> -		 * but we will not fail out the process.
> -		 */
> -		unlink( host_path );
> -		rmdir( host_path );
> -		ret = symlink(devtmpfs_path, host_path);
> -
> -		if ( ret < 0 ) {
> -			SYSERROR("WARNING: Failed to create symlink '%s'->'%s'", host_path, devtmpfs_path);
> -		}
> -		DEBUG("Bind mounting %s to %s", devtmpfs_path , path );
> -		ret = mount(devtmpfs_path, path, NULL, MS_BIND, 0 );
> -	} else {
> -		/* Only mount a tmpfs on here if we don't already a mount */
> -		if ( ! mount_check_fs( host_path, NULL ) ) {
> -			DEBUG("Mounting tmpfs to %s", host_path );
> -			ret = mount("none", path, "tmpfs", 0, "size=100000,mode=755");
> -		} else {
> -			/* This allows someone to manually set up a mount */
> -			DEBUG("Bind mounting %s to %s", host_path, path );
> -			ret = mount(host_path , path, NULL, MS_BIND, 0 );
> -		}
> +	/* Create $(mounted_rootfs/dev), and * mount a small tmpfs onto it */
> +	if (!dir_exists(path))
> +		create_devdir(path);

So I'm not sure how others feel but I usually try to have LXC do as
little changes to the rootfs as possible. Here, while I agree that /dev
pretty much has to exist for any Linux system to make any kind of sense,
I think I'd still prefer that we log a WARNING and skip autodev setup
entirely in that case (so starting the container without /dev).

> +	if (0 != mount("none", path, "tmpfs", 0, "size=100000,mode=755")) {

Nitpicking but can we be a bit consistent and have those tests the other
way around? (mount... != 0)

> +		SYSERROR("Failed mounting tmpfs onto %s\n", path);
> +		return false;
>  	}
> -	if (ret) {
> -		SYSERROR("Failed to mount /dev at %s", root);
> -		return -1;
> -	}
> -	ret = snprintf(path, MAXPATHLEN, "%s/dev/pts", root);
> -	if (ret < 0 || ret >= MAXPATHLEN)
> +
> +	INFO("Mounted tmpfs onto %s",  path);
> +
> +	ret = snprintf(path, clen, "%s/dev/pts", root);
> +	if (ret < 0 || ret >= clen)
>  		return -1;
> +
>  	/*
>  	 * If we are running on a devtmpfs mapping, dev/pts may already exist.
>  	 * If not, then create it and exit if that fails...
>  	 */
> -	if ( 0 != access(path, F_OK) || 0 != stat(path, &s) || 0 == S_ISDIR(s.st_mode) ) {
> +	if (!dir_exists(path)) {
>  		ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
>  		if (ret) {
>  			SYSERROR("Failed to create /dev/pts in container");
> @@ -1395,64 +1197,6 @@ static int setup_autodev(const char *root)
>  	return 0;
>  }
>  
> -/*
> - * Locate allocated devtmpfs mount and purge it.
> - * path lookup mostly taken from mk_devtmpfs
> - */
> -int lxc_delete_autodev(struct lxc_handler *handler)
> -{
> -	int ret;
> -	struct stat s;
> -	struct lxc_conf *lxc_conf = handler->conf;
> -	const char *name = handler->name;
> -	const char *lxcpath = handler->lxcpath;
> -	char tmp_path[MAXPATHLEN];
> -	uint64_t hash;
> -
> -	if ( lxc_conf->autodev <= 0 )
> -		return 0;
> -
> -	/* don't clean on reboot */
> -	if ( lxc_conf->reboot == 1 )
> -		return 0;
> -
> -	/*
> -	 * Use the same logic as mk_devtmpfs to compute candidate
> -	 * path for cleanup.
> -	 */
> -
> -	ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s", lxcpath, name);
> -	if (ret < 0 || ret >= MAXPATHLEN)
> -		return -1;
> -
> -	hash = fnv_64a_buf(tmp_path, ret, FNV1A_64_INIT);
> -
> -	/* Probe /dev/.lxc/<container name>.<hash> */
> -	ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, dev_base_path, name, hash);
> -	if (ret < 0 || ret >= MAXPATHLEN)
> -		return -1;
> -
> -	if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
> -		/* Probe /dev/.lxc/user/<container name>.<hash> */
> -		ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, dev_user_path, name, hash);
> -		if (ret < 0 || ret >= MAXPATHLEN)
> -			return -1;
> -
> -		if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
> -			WARN("Failed to locate autodev /dev/.lxc and /dev/.lxc/user." );
> -			return -1;
> -		}
> -	}
> -
> -	/* Do the cleanup */
> -	INFO("Cleaning %s", tmp_path );
> -	if ( 0 != lxc_rmdir_onedev(tmp_path, NULL) ) {
> -		ERROR("Failed to cleanup autodev" );
> -	}
> -
> -	return 0;
> -}
> -
>  static int setup_rootfs(struct lxc_conf *conf)
>  {
>  	const struct lxc_rootfs *rootfs = &conf->rootfs;
> diff --git a/src/lxc/start.c b/src/lxc/start.c
> index cd78665..98905a3 100644
> --- a/src/lxc/start.c
> +++ b/src/lxc/start.c
> @@ -477,7 +477,6 @@ void lxc_fini(const char *name, struct lxc_handler *handler)
>  
>  	lxc_console_delete(&handler->conf->console);
>  	lxc_delete_tty(&handler->conf->tty_info);
> -	lxc_delete_autodev(handler);
>  	close(handler->conf->maincmd_fd);
>  	handler->conf->maincmd_fd = -1;
>  	free(handler->name);
> -- 
> 2.1.0
> 
> _______________________________________________
> lxc-devel mailing list
> lxc-devel at lists.linuxcontainers.org
> http://lists.linuxcontainers.org/listinfo/lxc-devel

-- 
Stéphane Graber
Ubuntu developer
http://www.ubuntu.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: Digital signature
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20150112/ce891fbc/attachment.sig>


More information about the lxc-devel mailing list