[lxc-devel] [PATCH 1/1] pivot_root: switch to a new mechanism (v2)

Dwight Engen dwight.engen at oracle.com
Fri Sep 26 22:03:42 UTC 2014


On Sat, 20 Sep 2014 03:15:44 +0000
Serge Hallyn <serge.hallyn at ubuntu.com> wrote:

> This idea came from Andy Lutomirski.  Instead of using a
> temporary directory for the pivot_root put-old, use "." both
> for new-root and old-root.  Then fchdir into the old root
> temporarily in order to unmount the old-root, and finally
> chdir back into our '/'.
> 
> Drop lxc.pivotdir from the lxc.container.conf manpage.
> 
> Warn when we see a lxc.pivotdir entry (but keep it in the
> lxc.conf for now).
> 
> Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>

Hey Serge

Just a heads up that this change makes my containers not start on a
3.8.13 kernel. Reverting this change or using 3.13.11 kernel (just
tried a newer one I had handy) works. I'll try to debug further.
The log from trying to start a busybox container:

      lxc-start 1411768079.034 INFO     lxc_start_ui - using rcfile /container/bb01/config
      lxc-start 1411768079.035 WARN     lxc_log - lxc_log_init called with log already initialized
      lxc-start 1411768079.040 DEBUG    lxc_conf - allocated pty '/dev/pts/0' (5/6)
      lxc-start 1411768079.040 INFO     lxc_conf - tty's configured
      lxc-start 1411768079.040 DEBUG    lxc_start - sigchild handler set
      lxc-start 1411768079.040 DEBUG    lxc_console - opening /dev/tty for console peer
      lxc-start 1411768079.040 DEBUG    lxc_console - using '/dev/tty' as console
      lxc-start 1411768079.041 DEBUG    lxc_console - 6557 got SIGWINCH fd 11
      lxc-start 1411768079.041 DEBUG    lxc_console - set winsz dstfd:8 cols:128 rows:48
      lxc-start 1411768079.041 INFO     lxc_start - 'bb01' is initialized
      lxc-start 1411768079.042 DEBUG    lxc_start - Not dropping cap_sys_boot or watching utmp
      lxc-start 1411768079.069 DEBUG    lxc_conf - instanciated veth 'vethU2J1VP/veth8FP0C5', index is '6'
      lxc-start 1411768079.069 INFO     lxc_cgroup - cgroup driver cgroupfs initing for bb01
      lxc-start 1411768079.119 DEBUG    lxc_conf - move '(null)' to '6569'
      lxc-start 1411768079.124 DEBUG    lxc_conf - mounted '/container/bb01/rootfs' on '/usr/lib64/lxc/rootfs'
      lxc-start 1411768079.124 INFO     lxc_conf - 'bb01' hostname has been setup
      lxc-start 1411768079.125 DEBUG    lxc_conf - mac address '00:16:3e:17:fb:50' on 'eth0' has been setup
      lxc-start 1411768079.130 DEBUG    lxc_conf - 'eth0' has been setup
      lxc-start 1411768079.130 INFO     lxc_conf - network has been setup
      lxc-start 1411768079.130 DEBUG    lxc_conf - Set exec command to /sbin/init
      lxc-start 1411768079.134 INFO     lxc_conf - Autodev not required.
      lxc-start 1411768079.137 DEBUG    lxc_conf - remounting /lib on /usr/lib64/lxc/rootfs/lib to respect bind or remount options
      lxc-start 1411768079.138 DEBUG    lxc_conf - (at remount) flags for /lib was 4096, required extra flags are 0
      lxc-start 1411768079.138 DEBUG    lxc_conf - mountflags already was 4097, skipping remount
      lxc-start 1411768079.138 DEBUG    lxc_conf - mounted '/lib' on '/usr/lib64/lxc/rootfs/lib', type 'none'
      lxc-start 1411768079.138 DEBUG    lxc_conf - remounting /usr/lib on /usr/lib64/lxc/rootfs/usr/lib to respect bind or remount options
      lxc-start 1411768079.138 DEBUG    lxc_conf - (at remount) flags for /usr/lib was 4096, required extra flags are 0
      lxc-start 1411768079.138 DEBUG    lxc_conf - mountflags already was 4097, skipping remount
      lxc-start 1411768079.138 DEBUG    lxc_conf - mounted '/usr/lib' on '/usr/lib64/lxc/rootfs/usr/lib', type 'none'
      lxc-start 1411768079.138 DEBUG    lxc_conf - remounting /lib64 on /usr/lib64/lxc/rootfs/lib64 to respect bind or remount options
      lxc-start 1411768079.138 DEBUG    lxc_conf - (at remount) flags for /lib64 was 4096, required extra flags are 0
      lxc-start 1411768079.138 DEBUG    lxc_conf - mountflags already was 4097, skipping remount
      lxc-start 1411768079.138 DEBUG    lxc_conf - mounted '/lib64' on '/usr/lib64/lxc/rootfs/lib64', type 'none'
      lxc-start 1411768079.138 DEBUG    lxc_conf - remounting /usr/lib64 on /usr/lib64/lxc/rootfs/usr/lib64 to respect bind or remount options
      lxc-start 1411768079.138 DEBUG    lxc_conf - (at remount) flags for /usr/lib64 was 4096, required extra flags are 0
      lxc-start 1411768079.138 DEBUG    lxc_conf - mountflags already was 4097, skipping remount
      lxc-start 1411768079.138 DEBUG    lxc_conf - mounted '/usr/lib64' on '/usr/lib64/lxc/rootfs/usr/lib64', type 'none'
      lxc-start 1411768079.139 DEBUG    lxc_conf - remounting /sys/kernel/security on /usr/lib64/lxc/rootfs/sys/kernel/security to respect bind or remount options
      lxc-start 1411768079.139 DEBUG    lxc_conf - (at remount) flags for /sys/kernel/security was 4096, required extra flags are 0
      lxc-start 1411768079.139 DEBUG    lxc_conf - mountflags already was 4097, skipping remount
      lxc-start 1411768079.139 DEBUG    lxc_conf - mounted '/sys/kernel/security' on '/usr/lib64/lxc/rootfs/sys/kernel/security', type 'none'
      lxc-start 1411768079.139 INFO     lxc_conf - mount points have been setup
      lxc-start 1411768079.139 INFO     lxc_conf - console has been setup
      lxc-start 1411768079.147 INFO     lxc_conf - 1 tty(s) has been setup
      lxc-start 1411768079.147 INFO     lxc_conf - I am 1, /proc/self points to '1'
      lxc-start 1411768079.147 DEBUG    lxc_conf - pivot_root syscall to '/usr/lib64/lxc/rootfs' successful
      lxc-start 1411768079.147 ERROR    lxc_conf - Invalid argument - failed to mount a new instance of '/dev/pts'
      lxc-start 1411768079.155 ERROR    lxc_conf - failed to setup the new pts instance
      lxc-start 1411768079.157 ERROR    lxc_start - failed to setup the container
      lxc-start 1411768079.157 ERROR    lxc_sync - invalid sequence number 1. expected 2
      lxc-start 1411768079.158 WARN     lxc_conf - failed to remove interface '(null)'
      lxc-start 1411768079.159 ERROR    lxc_start - failed to spawn 'bb01'
      lxc-start 1411768079.161 ERROR    lxc_start_ui - The container failed to start.
      lxc-start 1411768079.162 ERROR    lxc_start_ui - Additional information can be obtained by setting the --logfile and --logpriority options.


> ---
>  doc/lxc.container.conf.sgml.in |  14 ---
>  src/lxc/conf.c                 | 211
> ++++++++---------------------------------
> src/lxc/confile.c              |   1 + 3 files changed, 40
> insertions(+), 186 deletions(-)
> 
> diff --git a/doc/lxc.container.conf.sgml.in
> b/doc/lxc.container.conf.sgml.in index 121f882..8dbab5f 100644
> --- a/doc/lxc.container.conf.sgml.in
> +++ b/doc/lxc.container.conf.sgml.in
> @@ -943,20 +943,6 @@ proc proc proc nodev,noexec,nosuid 0 0
>  	  </listitem>
>  	</varlistentry>
>  
> -	<varlistentry>
> -	  <term>
> -	    <option>lxc.pivotdir</option>
> -	  </term>
> -	  <listitem>
> -	    <para>
> -	      where to pivot the original root file system under
> -	      <option>lxc.rootfs</option>, specified relatively to
> -	      that.  The default is <filename>mnt</filename>.
> -	      It is created if necessary, and also removed after
> -	      unmounting everything from it during container setup.
> -	    </para>
> -	  </listitem>
> -	</varlistentry>
>        </variablelist>
>      </refsect2>
>  
> diff --git a/src/lxc/conf.c b/src/lxc/conf.c
> index e61002b..31673d5 100644
> --- a/src/lxc/conf.c
> +++ b/src/lxc/conf.c
> @@ -1025,199 +1025,66 @@ static int setup_tty(const struct lxc_rootfs
> *rootfs, return 0;
>  }
>  
> -static int setup_rootfs_pivot_root_cb(char *buffer, void *data)
> -{
> -	struct lxc_list	*mountlist, *listentry, *iterator;
> -	char *pivotdir, *mountpoint, *mountentry, *saveptr = NULL;
> -	int found;
> -	void **cbparm;
> -
> -	mountentry = buffer;
> -	cbparm = (void **)data;
> -
> -	mountlist = cbparm[0];
> -	pivotdir  = cbparm[1];
> -
> -	/* parse entry, first field is mountname, ignore */
> -	mountpoint = strtok_r(mountentry, " ", &saveptr);
> -	if (!mountpoint)
> -		return -1;
> -
> -	/* second field is mountpoint */
> -	mountpoint = strtok_r(NULL, " ", &saveptr);
> -	if (!mountpoint)
> -		return -1;
> -
> -	/* only consider mountpoints below old root fs */
> -	if (strncmp(mountpoint, pivotdir, strlen(pivotdir)))
> -		return 0;
> -
> -	/* filter duplicate mountpoints */
> -	found = 0;
> -	lxc_list_for_each(iterator, mountlist) {
> -		if (!strcmp(iterator->elem, mountpoint)) {
> -			found = 1;
> -			break;
> -		}
> -	}
> -	if (found)
> -		return 0;
> -
> -	/* add entry to list */
> -	listentry = malloc(sizeof(*listentry));
> -	if (!listentry) {
> -		SYSERROR("malloc for mountpoint listentry failed");
> -		return -1;
> -	}
>  
> -	listentry->elem = strdup(mountpoint);
> -	if (!listentry->elem) {
> -		SYSERROR("strdup failed");
> -		free(listentry);
> -		return -1;
> -	}
> -	lxc_list_add_tail(mountlist, listentry);
> -
> -	return 0;
> -}
> -
> -static int umount_oldrootfs(const char *oldrootfs)
> +static int setup_rootfs_pivot_root(const char *rootfs, const char
> *pivotdir) {
> -	char path[MAXPATHLEN];
> -	void *cbparm[2];
> -	struct lxc_list mountlist, *iterator, *next;
> -	int ok, still_mounted, last_still_mounted;
> -	int rc;
> +	int oldroot = -1, newroot = -1;
>  
> -	/* read and parse /proc/mounts in old root fs */
> -	lxc_list_init(&mountlist);
> -
> -	/* oldrootfs is on the top tree directory now */
> -	rc = snprintf(path, sizeof(path), "/%s", oldrootfs);
> -	if (rc >= sizeof(path)) {
> -		ERROR("rootfs name too long");
> +	oldroot = open("/", O_DIRECTORY | O_RDONLY);
> +	if (oldroot < 0) {
> +		SYSERROR("Error opening old-/ for fchdir");
>  		return -1;
>  	}
> -	cbparm[0] = &mountlist;
> -
> -	cbparm[1] = strdup(path);
> -	if (!cbparm[1]) {
> -		SYSERROR("strdup failed");
> -		return -1;
> +	newroot = open(rootfs, O_DIRECTORY | O_RDONLY);
> +	if (newroot < 0) {
> +		SYSERROR("Error opening new-/ for fchdir");
> +		goto fail;
>  	}
>  
> -	rc = snprintf(path, sizeof(path), "%s/proc/mounts",
> oldrootfs);
> -	if (rc >= sizeof(path)) {
> -		ERROR("container proc/mounts name too long");
> -		return -1;
> -	}
> -
> -	ok = lxc_file_for_each_line(path,
> -				    setup_rootfs_pivot_root_cb,
> &cbparm);
> -	if (ok < 0) {
> -		SYSERROR("failed to read or parse mount list '%s'",
> path);
> -		return -1;
> -	}
> -
> -	/* umount filesystems until none left or list no longer
> shrinks */
> -	still_mounted = 0;
> -	do {
> -		last_still_mounted = still_mounted;
> -		still_mounted = 0;
> -
> -		lxc_list_for_each_safe(iterator, &mountlist, next) {
> -
> -			/* umount normally */
> -			if (!umount(iterator->elem)) {
> -				DEBUG("umounted '%s'", (char
> *)iterator->elem);
> -				lxc_list_del(iterator);
> -				continue;
> -			}
> -
> -			still_mounted++;
> -		}
> -
> -	} while (still_mounted > 0 && still_mounted !=
> last_still_mounted); -
> -
> -	lxc_list_for_each(iterator, &mountlist) {
> -
> -		/* let's try a lazy umount */
> -		if (!umount2(iterator->elem, MNT_DETACH)) {
> -			INFO("lazy unmount of '%s'", (char
> *)iterator->elem);
> -			continue;
> -		}
> -
> -		/* be more brutal (nfs) */
> -		if (!umount2(iterator->elem, MNT_FORCE)) {
> -			INFO("forced unmount of '%s'", (char
> *)iterator->elem);
> -			continue;
> -		}
> -
> -		WARN("failed to unmount '%s'", (char
> *)iterator->elem);
> -	}
> -
> -	return 0;
> -}
> -
> -static int setup_rootfs_pivot_root(const char *rootfs, const char
> *pivotdir) -{
> -	char path[MAXPATHLEN];
> -	int remove_pivotdir = 0;
> -	int rc;
> -
>  	/* change into new root fs */
> -	if (chdir(rootfs)) {
> +	if (fchdir(newroot)) {
>  		SYSERROR("can't chdir to new rootfs '%s'", rootfs);
> -		return -1;
> -	}
> -
> -	if (!pivotdir)
> -		pivotdir = "lxc_putold";
> -
> -	/* compute the full path to pivotdir under rootfs */
> -	rc = snprintf(path, sizeof(path), "%s/%s", rootfs, pivotdir);
> -	if (rc >= sizeof(path)) {
> -		ERROR("pivot dir name too long");
> -		return -1;
> +		goto fail;
>  	}
>  
> -	if (access(path, F_OK)) {
> -
> -		if (mkdir_p(path, 0755) < 0) {
> -			SYSERROR("failed to create pivotdir '%s'",
> path);
> -			return -1;
> -		}
> -
> -		remove_pivotdir = 1;
> -		DEBUG("created '%s' directory", path);
> -	}
> -
> -	DEBUG("mountpoint for old rootfs is '%s'", path);
> -
>  	/* pivot_root into our new root fs */
> -	if (pivot_root(".", path)) {
> +	if (pivot_root(".", ".")) {
>  		SYSERROR("pivot_root syscall failed");
> -		return -1;
> +		goto fail;
>  	}
>  
> -	if (chdir("/")) {
> -		SYSERROR("can't chdir to / after pivot_root");
> -		return -1;
> +	/*
> +	 * at this point the old-root is mounted on top of our
> new-root
> +	 * To unmounted it we must not be chdir'd into it, so escape
> back
> +	 * to old-root
> +	 */
> +	if (fchdir(oldroot) < 0) {
> +		SYSERROR("Error entering oldroot");
> +		goto fail;
> +	}
> +	if (umount2("/", MNT_DETACH) < 0) {
> +		SYSERROR("Error detaching old root");
> +		goto fail;
>  	}
>  
> -	DEBUG("pivot_root syscall to '%s' successful", rootfs);
> +	if (fchdir(newroot) < 0) {
> +		SYSERROR("Error re-entering newroot");
> +		goto fail;
> +	}
>  
> -	/* we switch from absolute path to relative path */
> -	if (umount_oldrootfs(pivotdir))
> -		return -1;
> +	close(oldroot);
> +	close(newroot);
>  
> -	/* remove temporary mount point, we don't consider the
> removing
> -	 * as fatal */
> -	if (remove_pivotdir && rmdir(pivotdir))
> -		WARN("can't remove mountpoint '%s': %m", pivotdir);
> +	DEBUG("pivot_root syscall to '%s' successful", rootfs);
>  
>  	return 0;
> +
> +fail:
> +	if (oldroot != -1)
> +		close(oldroot);
> +	if (newroot != -1)
> +		close(newroot);
> +	return -1;
>  }
>  
>  /*
> diff --git a/src/lxc/confile.c b/src/lxc/confile.c
> index 9b1fba8..1475ac1 100644
> --- a/src/lxc/confile.c
> +++ b/src/lxc/confile.c
> @@ -1662,6 +1662,7 @@ static int config_rootfs_options(const char
> *key, const char *value, static int config_pivotdir(const char *key,
> const char *value, struct lxc_conf *lxc_conf)
>  {
> +	WARN("lxc.pivotdir is ignored.  It will soon become an
> error."); return config_path_item(&lxc_conf->rootfs.pivot, value);
>  }
>  



More information about the lxc-devel mailing list