[lxc-devel] [lxd/master] Add shiftfs

stgraber on Github lxc-bot at linuxcontainers.org
Fri Mar 29 21:16:11 UTC 2019


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 301 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20190329/bce3d4e8/attachment.bin>
-------------- next part --------------
From c5378b802b7fce4fbf28bf6d6970ffe0b35c753f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Thu, 28 Mar 2019 15:36:30 -0400
Subject: [PATCH 01/10] lxd/containers: Cleanup shifting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/container_lxc.go | 58 ++++++++++++++++++++++++++------------------
 1 file changed, 34 insertions(+), 24 deletions(-)

diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 98e08d4c3a..610a62f9d8 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -1976,28 +1976,17 @@ func (c *containerLXC) startCommon() (string, error) {
 	}
 
 	/* Deal with idmap changes */
-	idmap, err := c.NextIdmap()
+	nextIdmap, err := c.NextIdmap()
 	if err != nil {
 		return "", errors.Wrap(err, "Set ID map")
 	}
 
-	lastIdmap, err := c.DiskIdmap()
+	diskIdmap, err := c.DiskIdmap()
 	if err != nil {
 		return "", errors.Wrap(err, "Set last ID map")
 	}
 
-	var jsonIdmap string
-	if idmap != nil {
-		idmapBytes, err := json.Marshal(idmap.Idmap)
-		if err != nil {
-			return "", err
-		}
-		jsonIdmap = string(idmapBytes)
-	} else {
-		jsonIdmap = "[]"
-	}
-
-	if !reflect.DeepEqual(idmap, lastIdmap) {
+	if !reflect.DeepEqual(nextIdmap, diskIdmap) {
 		if shared.IsTrue(c.expandedConfig["security.protection.shift"]) {
 			return "", fmt.Errorf("Container is protected against filesystem shifting")
 		}
@@ -2010,11 +1999,11 @@ func (c *containerLXC) startCommon() (string, error) {
 			return "", errors.Wrap(err, "Storage start")
 		}
 
-		if lastIdmap != nil {
+		if diskIdmap != nil {
 			if c.Storage().GetStorageType() == storageTypeZfs {
-				err = lastIdmap.UnshiftRootfs(c.RootfsPath(), zfsIdmapSetSkipper)
+				err = diskIdmap.UnshiftRootfs(c.RootfsPath(), zfsIdmapSetSkipper)
 			} else {
-				err = lastIdmap.UnshiftRootfs(c.RootfsPath(), nil)
+				err = diskIdmap.UnshiftRootfs(c.RootfsPath(), nil)
 			}
 			if err != nil {
 				if ourStart {
@@ -2024,11 +2013,11 @@ func (c *containerLXC) startCommon() (string, error) {
 			}
 		}
 
-		if idmap != nil {
+		if nextIdmap != nil {
 			if c.Storage().GetStorageType() == storageTypeZfs {
-				err = idmap.ShiftRootfs(c.RootfsPath(), zfsIdmapSetSkipper)
+				err = nextIdmap.ShiftRootfs(c.RootfsPath(), zfsIdmapSetSkipper)
 			} else {
-				err = idmap.ShiftRootfs(c.RootfsPath(), nil)
+				err = nextIdmap.ShiftRootfs(c.RootfsPath(), nil)
 			}
 			if err != nil {
 				if ourStart {
@@ -2038,7 +2027,16 @@ func (c *containerLXC) startCommon() (string, error) {
 			}
 		}
 
-		err = c.ConfigKeySet("volatile.last_state.idmap", jsonIdmap)
+		jsonDiskIdmap := "[]"
+		if nextIdmap != nil {
+			idmapBytes, err := json.Marshal(nextIdmap.Idmap)
+			if err != nil {
+				return "", err
+			}
+			jsonDiskIdmap = string(idmapBytes)
+		}
+
+		err = c.ConfigKeySet("volatile.last_state.idmap", jsonDiskIdmap)
 		if err != nil {
 			return "", errors.Wrapf(err, "Set volatile.last_state.idmap config key on container %q (id %d)", c.name, c.id)
 		}
@@ -2046,9 +2044,21 @@ func (c *containerLXC) startCommon() (string, error) {
 		c.updateProgress("")
 	}
 
-	err = c.ConfigKeySet("volatile.idmap.current", jsonIdmap)
-	if err != nil {
-		return "", errors.Wrapf(err, "Set volatile.idmap.current config key on container %q (id %d)", c.name, c.id)
+	var idmapBytes []byte
+	if nextIdmap == nil {
+		idmapBytes = []byte("[]")
+	} else {
+		idmapBytes, err = json.Marshal(nextIdmap.Idmap)
+		if err != nil {
+			return "", err
+		}
+	}
+
+	if c.localConfig["volatile.idmap.current"] != string(idmapBytes) {
+		err = c.ConfigKeySet("volatile.idmap.current", string(idmapBytes))
+		if err != nil {
+			return "", errors.Wrapf(err, "Set volatile.idmap.current config key on container %q (id %d)", c.name, c.id)
+		}
 	}
 
 	// Generate the Seccomp profile

From 44386f0a9a4de18612d01df1489bcead94b9e9c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Thu, 28 Mar 2019 15:38:15 -0400
Subject: [PATCH 02/10] lxd/migrate: Shift CRIU files to current map
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/container_lxc.go | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 610a62f9d8..467c720b39 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -5406,12 +5406,12 @@ func (c *containerLXC) Migrate(args *CriuMigrationArgs) error {
 		 * opened by the process after it is in its user
 		 * namespace.
 		 */
-		if !c.IsPrivileged() {
-			idmapset, err := c.CurrentIdmap()
-			if err != nil {
-				return err
-			}
+		idmapset, err := c.CurrentIdmap()
+		if err != nil {
+			return err
+		}
 
+		if idmapset != nil {
 			ourStart, err := c.StorageStart()
 			if err != nil {
 				return err

From 01ed59e4fc85864c0dbacc51a911d37639ae708d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Thu, 28 Mar 2019 15:38:29 -0400
Subject: [PATCH 03/10] lxd/containers: Cleanup template application
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/container_lxc.go | 86 ++++++++++++++++++++++----------------------
 1 file changed, 42 insertions(+), 44 deletions(-)

diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 467c720b39..a5f136f7e2 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -5578,6 +5578,46 @@ func (c *containerLXC) templateApplyNow(trigger string) error {
 		return errors.Wrapf(err, "Could not parse %s", fname)
 	}
 
+	// Find rootUid and rootGid
+	idmapset, err := c.DiskIdmap()
+	if err != nil {
+		return errors.Wrap(err, "Failed to set ID map")
+	}
+
+	rootUid := int64(0)
+	rootGid := int64(0)
+
+	// Get the right uid and gid for the container
+	if idmapset != nil {
+		rootUid, rootGid = idmapset.ShiftIntoNs(0, 0)
+	}
+
+	// Figure out the container architecture
+	arch, err := osarch.ArchitectureName(c.architecture)
+	if err != nil {
+		arch, err = osarch.ArchitectureName(c.state.OS.Architectures[0])
+		if err != nil {
+			return errors.Wrap(err, "Failed to detect system architecture")
+		}
+	}
+
+	// Generate the container metadata
+	containerMeta := make(map[string]string)
+	containerMeta["name"] = c.name
+	containerMeta["architecture"] = arch
+
+	if c.ephemeral {
+		containerMeta["ephemeral"] = "true"
+	} else {
+		containerMeta["ephemeral"] = "false"
+	}
+
+	if c.IsPrivileged() {
+		containerMeta["privileged"] = "true"
+	} else {
+		containerMeta["privileged"] = "false"
+	}
+
 	// Go through the templates
 	for tplPath, tpl := range metadata.Templates {
 		var w *os.File
@@ -5608,22 +5648,8 @@ func (c *containerLXC) templateApplyNow(trigger string) error {
 				return errors.Wrap(err, "Failed to create template file")
 			}
 		} else {
-			// Create a new one
-			uid := int64(0)
-			gid := int64(0)
-
-			// Get the right uid and gid for the container
-			if !c.IsPrivileged() {
-				idmapset, err := c.DiskIdmap()
-				if err != nil {
-					return errors.Wrap(err, "Failed to set ID map")
-				}
-
-				uid, gid = idmapset.ShiftIntoNs(0, 0)
-			}
-
 			// Create the directories leading to the file
-			shared.MkdirAllOwner(path.Dir(fullpath), 0755, int(uid), int(gid))
+			shared.MkdirAllOwner(path.Dir(fullpath), 0755, int(rootUid), int(rootGid))
 
 			// Create the file itself
 			w, err = os.Create(fullpath)
@@ -5632,9 +5658,7 @@ func (c *containerLXC) templateApplyNow(trigger string) error {
 			}
 
 			// Fix ownership and mode
-			if !c.IsPrivileged() {
-				w.Chown(int(uid), int(gid))
-			}
+			w.Chown(int(rootUid), int(rootGid))
 			w.Chmod(0644)
 		}
 		defer w.Close()
@@ -5653,32 +5677,6 @@ func (c *containerLXC) templateApplyNow(trigger string) error {
 			return errors.Wrap(err, "Failed to render template")
 		}
 
-		// Figure out the architecture
-		arch, err := osarch.ArchitectureName(c.architecture)
-		if err != nil {
-			arch, err = osarch.ArchitectureName(c.state.OS.Architectures[0])
-			if err != nil {
-				return errors.Wrap(err, "Failed to detect system architecture")
-			}
-		}
-
-		// Generate the metadata
-		containerMeta := make(map[string]string)
-		containerMeta["name"] = c.name
-		containerMeta["architecture"] = arch
-
-		if c.ephemeral {
-			containerMeta["ephemeral"] = "true"
-		} else {
-			containerMeta["ephemeral"] = "false"
-		}
-
-		if c.IsPrivileged() {
-			containerMeta["privileged"] = "true"
-		} else {
-			containerMeta["privileged"] = "false"
-		}
-
 		configGet := func(confKey, confDefault *pongo2.Value) *pongo2.Value {
 			val, ok := c.expandedConfig[confKey.String()]
 			if !ok {

From 4bc57847fc87bf5a09c2bf79e7fa873c0744d70a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Thu, 28 Mar 2019 15:54:37 -0400
Subject: [PATCH 04/10] lxd/containers: Properly handle tar shifting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/container_lxc.go | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index a5f136f7e2..bc9e35f6d1 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -6380,17 +6380,19 @@ func (c *containerLXC) tarStoreFile(linkmap map[uint64]string, offset int, tw *t
 	}
 
 	// Unshift the id under /rootfs/ for unpriv containers
-	if !c.IsPrivileged() && strings.HasPrefix(hdr.Name, "/rootfs") {
+	if strings.HasPrefix(hdr.Name, "/rootfs") {
 		idmapset, err := c.DiskIdmap()
 		if err != nil {
 			return err
 		}
 
-		huid, hgid := idmapset.ShiftFromNs(int64(hdr.Uid), int64(hdr.Gid))
-		hdr.Uid = int(huid)
-		hdr.Gid = int(hgid)
-		if hdr.Uid == -1 || hdr.Gid == -1 {
-			return nil
+		if idmapset != nil {
+			huid, hgid := idmapset.ShiftFromNs(int64(hdr.Uid), int64(hdr.Gid))
+			hdr.Uid = int(huid)
+			hdr.Gid = int(hgid)
+			if hdr.Uid == -1 || hdr.Gid == -1 {
+				return nil
+			}
 		}
 	}
 

From 0dce3bdfc6aae6f31f0c470cb65c049b82fb01f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Thu, 28 Mar 2019 15:56:50 -0400
Subject: [PATCH 05/10] lxd/containers: Handle mid-remap containers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/container_lxc.go | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index bc9e35f6d1..2df909738b 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -1854,7 +1854,7 @@ func (c *containerLXC) expandDevices(profiles []api.Profile) error {
 // setupUnixDevice() creates the unix device and sets up the necessary low-level
 // liblxc configuration items.
 func (c *containerLXC) setupUnixDevice(prefix string, dev types.Device, major int, minor int, path string, createMustSucceed bool, defaultMode bool) error {
-	if c.IsPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
+	if c.isCurrentlyPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
 		err := lxcSetConfigItem(c.c, "lxc.cgroup.devices.allow", fmt.Sprintf("c %d:%d rwm", major, minor))
 		if err != nil {
 			return err
@@ -2102,7 +2102,7 @@ func (c *containerLXC) startCommon() (string, error) {
 				continue
 			}
 			devPath := paths[0]
-			if c.IsPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
+			if c.isCurrentlyPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
 				// Add the new device cgroup rule
 				dType, dMajor, dMinor, err := deviceGetAttributes(devPath)
 				if err != nil {
@@ -2407,7 +2407,7 @@ func (c *containerLXC) startCommon() (string, error) {
 
 	// Set right permission to allow traversal
 	var mode os.FileMode
-	if c.IsPrivileged() {
+	if c.isCurrentlyPrivileged() {
 		mode = 0700
 	} else {
 		mode = 0711
@@ -6813,7 +6813,7 @@ func (c *containerLXC) insertUnixDevice(prefix string, m types.Device, defaultMo
 		}
 	}
 
-	if c.IsPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
+	if !c.isCurrentlyPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
 		// Add the new device cgroup rule
 		if err := c.CGroupSet("devices.allow", fmt.Sprintf("%s %d:%d rwm", dType, dMajor, dMinor)); err != nil {
 			return fmt.Errorf("Failed to add cgroup rule for device")
@@ -6885,7 +6885,7 @@ func (c *containerLXC) removeUnixDevice(prefix string, m types.Device, eject boo
 		}
 	}
 
-	if c.IsPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
+	if c.isCurrentlyPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
 		// Remove the device cgroup rule
 		err = c.CGroupSet("devices.deny", fmt.Sprintf("%s %d:%d rwm", dType, dMajor, dMinor))
 		if err != nil {
@@ -7007,7 +7007,7 @@ func (c *containerLXC) addInfinibandDevicesPerPort(deviceName string, ifDev *IBF
 			return err
 		}
 
-		if c.IsPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
+		if c.isCurrentlyPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
 			// Add the new device cgroup rule
 			dType, dMajor, dMinor, err := deviceGetAttributes(devPath)
 			if err != nil {
@@ -7055,7 +7055,7 @@ func (c *containerLXC) addInfinibandDevicesPerFun(deviceName string, ifDev *IBF,
 			return err
 		}
 		devPath := paths[0]
-		if c.IsPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
+		if c.isCurrentlyPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
 			// Add the new device cgroup rule
 			dType, dMajor, dMinor, err := deviceGetAttributes(devPath)
 			if err != nil {
@@ -8651,6 +8651,19 @@ func (c *containerLXC) IsNesting() bool {
 	return shared.IsTrue(c.expandedConfig["security.nesting"])
 }
 
+func (c *containerLXC) isCurrentlyPrivileged() bool {
+	if !c.IsRunning() {
+		return c.IsPrivileged()
+	}
+
+	idmap, err := c.CurrentIdmap()
+	if err != nil {
+		return c.IsPrivileged()
+	}
+
+	return idmap == nil
+}
+
 func (c *containerLXC) IsPrivileged() bool {
 	return shared.IsTrue(c.expandedConfig["security.privileged"])
 }

From 0ee04324bc799c2ec69499e49a542c1601c497b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Fri, 29 Mar 2019 15:56:22 -0400
Subject: [PATCH 06/10] lxd/containers: Stop proxy before storage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/container_lxc.go | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 2df909738b..c4cff68a3b 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -2895,8 +2895,14 @@ func (c *containerLXC) OnStop(target string) error {
 	// Make sure we can't call go-lxc functions by mistake
 	c.fromHook = true
 
+	// Kill all proxy devices, must happen before StorageStop
+	err := c.removeProxyDevices()
+	if err != nil {
+		return fmt.Errorf("Unable to remove proxy devices: %v", err)
+	}
+
 	// Stop the storage for this container
-	_, err := c.StorageStop()
+	_, err = c.StorageStop()
 	if err != nil {
 		if op != nil {
 			op.Done(err)
@@ -2961,12 +2967,6 @@ func (c *containerLXC) OnStop(target string) error {
 			logger.Error("Unable to remove network filters", log.Ctx{"container": c.Name(), "err": err})
 		}
 
-		// Clean all proxy devices
-		err = c.removeProxyDevices()
-		if err != nil {
-			logger.Error("Unable to remove proxy devices", log.Ctx{"container": c.Name(), "err": err})
-		}
-
 		// Reboot the container
 		if target == "reboot" {
 			// Start the container again

From cdae442545a808b2e91ba838e6cb5fcb8301dcd1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Fri, 29 Mar 2019 15:38:11 -0400
Subject: [PATCH 07/10] lxd/storage/zfs: Run rename in clean mntns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/main.go              |   4 ++
 lxd/main_forkzfs.go      | 109 +++++++++++++++++++++++++++++++++++++++
 lxd/storage_zfs.go       |  10 ++--
 lxd/storage_zfs_utils.go |  27 +++++++---
 4 files changed, 137 insertions(+), 13 deletions(-)
 create mode 100644 lxd/main_forkzfs.go

diff --git a/lxd/main.go b/lxd/main.go
index 49b0f6520b..79d94fbe0e 100644
--- a/lxd/main.go
+++ b/lxd/main.go
@@ -128,6 +128,10 @@ func main() {
 	forkueventCmd := cmdForkuevent{global: &globalCmd}
 	app.AddCommand(forkueventCmd.Command())
 
+	// forkzfs sub-command
+	forkzfsCmd := cmdForkZFS{global: &globalCmd}
+	app.AddCommand(forkzfsCmd.Command())
+
 	// import sub-command
 	importCmd := cmdImport{global: &globalCmd}
 	app.AddCommand(importCmd.Command())
diff --git a/lxd/main_forkzfs.go b/lxd/main_forkzfs.go
new file mode 100644
index 0000000000..14cae00788
--- /dev/null
+++ b/lxd/main_forkzfs.go
@@ -0,0 +1,109 @@
+package main
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"syscall"
+
+	"github.com/spf13/cobra"
+
+	"github.com/lxc/lxd/shared"
+)
+
+type cmdForkZFS struct {
+	global *cmdGlobal
+}
+
+func (c *cmdForkZFS) Command() *cobra.Command {
+	// Main subcommand
+	cmd := &cobra.Command{}
+	cmd.Use = "forkzfs [<arguments>...]"
+	cmd.Short = "Run ZFS inside a cleaned up mount namepsace"
+	cmd.Long = `Description:
+  Run ZFS inside a cleaned up mount namepsace
+
+  This internal command is used to run ZFS in some specific cases.
+`
+	cmd.RunE = c.Run
+	cmd.Hidden = true
+
+	return cmd
+}
+
+func (c *cmdForkZFS) Run(cmd *cobra.Command, args []string) error {
+	// Sanity checks
+	if len(args) < 1 {
+		cmd.Help()
+
+		if len(args) == 0 {
+			return nil
+		}
+
+		return fmt.Errorf("Missing required arguments")
+	}
+
+	// Only root should run this
+	if os.Geteuid() != 0 {
+		return fmt.Errorf("This must be run as root")
+	}
+
+	// Unshare a clean mount namespace
+	err := syscall.Unshare(syscall.CLONE_NEWNS)
+	if err != nil {
+		return err
+	}
+
+	// Mark mount tree as private
+	err = syscall.Mount("none", "/", "", syscall.MS_REC|syscall.MS_PRIVATE, "")
+	if err != nil {
+		return err
+	}
+
+	// Expand the mount path
+	absPath, err := filepath.Abs(shared.VarPath())
+	if err != nil {
+		return err
+	}
+
+	expPath, err := filepath.EvalSymlinks(absPath)
+	if err != nil {
+		expPath = absPath
+	}
+
+	// Find the source mount of the path
+	file, err := os.Open("/proc/self/mountinfo")
+	if err != nil {
+		return err
+	}
+	defer file.Close()
+
+	// Unmount all mounts under LXD directory
+	scanner := bufio.NewScanner(file)
+	for scanner.Scan() {
+		line := scanner.Text()
+		rows := strings.Fields(line)
+
+		if !strings.HasPrefix(rows[4], expPath) {
+			continue
+		}
+
+		syscall.Unmount(rows[4], syscall.MNT_DETACH)
+	}
+
+	// Run the ZFS command
+	command := exec.Command("zfs", args...)
+	command.Stdin = os.Stdin
+	command.Stdout = os.Stdout
+	command.Stderr = os.Stderr
+
+	err = command.Run()
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
diff --git a/lxd/storage_zfs.go b/lxd/storage_zfs.go
index 0a01d16c74..197e17a9dd 100644
--- a/lxd/storage_zfs.go
+++ b/lxd/storage_zfs.go
@@ -507,7 +507,7 @@ func (s *storageZfs) StoragePoolVolumeDelete() error {
 				return err
 			}
 
-			err = zfsPoolVolumeRename(poolName, fs, fmt.Sprintf("deleted/custom/%s", uuid.NewRandom().String()))
+			err = zfsPoolVolumeRename(poolName, fs, fmt.Sprintf("deleted/custom/%s", uuid.NewRandom().String()), true)
 			if err != nil {
 				return err
 			}
@@ -761,7 +761,7 @@ func (s *storageZfs) StoragePoolVolumeRename(newName string) error {
 		newPath = fmt.Sprintf("custom/%s", newName)
 	}
 	poolName := s.getOnDiskPoolName()
-	err = zfsPoolVolumeRename(poolName, oldPath, newPath)
+	err = zfsPoolVolumeRename(poolName, oldPath, newPath, false)
 	if err != nil {
 		return err
 	}
@@ -1446,7 +1446,7 @@ func (s *storageZfs) ContainerRename(container container, newName string) error
 	// Rename the dataset.
 	oldZfsDataset := fmt.Sprintf("containers/%s", oldName)
 	newZfsDataset := fmt.Sprintf("containers/%s", newName)
-	err = zfsPoolVolumeRename(poolName, oldZfsDataset, newZfsDataset)
+	err = zfsPoolVolumeRename(poolName, oldZfsDataset, newZfsDataset, false)
 	if err != nil {
 		return err
 	}
@@ -2363,7 +2363,7 @@ func (s *storageZfs) ImageCreate(fingerprint string, tracker *ioprogress.Progres
 	}()
 
 	if zfsFilesystemEntityExists(poolName, fmt.Sprintf("deleted/%s", fs)) {
-		if err := zfsPoolVolumeRename(poolName, fmt.Sprintf("deleted/%s", fs), fs); err != nil {
+		if err := zfsPoolVolumeRename(poolName, fmt.Sprintf("deleted/%s", fs), fs, true); err != nil {
 			return err
 		}
 
@@ -2492,7 +2492,7 @@ func (s *storageZfs) ImageDelete(fingerprint string) error {
 				return err
 			}
 
-			if err := zfsPoolVolumeRename(poolName, fs, fmt.Sprintf("deleted/%s", fs)); err != nil {
+			if err := zfsPoolVolumeRename(poolName, fs, fmt.Sprintf("deleted/%s", fs), true); err != nil {
 				return err
 			}
 		}
diff --git a/lxd/storage_zfs_utils.go b/lxd/storage_zfs_utils.go
index 4ce07da45f..a5f0eaaef6 100644
--- a/lxd/storage_zfs_utils.go
+++ b/lxd/storage_zfs_utils.go
@@ -358,17 +358,28 @@ func zfsFilesystemEntityPropertyGet(pool string, path string, key string) (strin
 	return strings.TrimRight(output, "\n"), nil
 }
 
-func zfsPoolVolumeRename(pool string, source string, dest string) error {
+func zfsPoolVolumeRename(pool string, source string, dest string, ignoreMounts bool) error {
 	var err error
 	var output string
 
 	for i := 0; i < 20; i++ {
-		output, err = shared.RunCommand(
-			"zfs",
-			"rename",
-			"-p",
-			fmt.Sprintf("%s/%s", pool, source),
-			fmt.Sprintf("%s/%s", pool, dest))
+		if ignoreMounts {
+			output, err = shared.RunCommand(
+				"/proc/self/exe",
+				"forkzfs",
+				"--",
+				"rename",
+				"-p",
+				fmt.Sprintf("%s/%s", pool, source),
+				fmt.Sprintf("%s/%s", pool, dest))
+		} else {
+			output, err = shared.RunCommand(
+				"zfs",
+				"rename",
+				"-p",
+				fmt.Sprintf("%s/%s", pool, source),
+				fmt.Sprintf("%s/%s", pool, dest))
+		}
 
 		// Success
 		if err == nil {
@@ -738,7 +749,7 @@ func (s *storageZfs) doContainerDelete(project, name string) error {
 				return err
 			}
 
-			err = zfsPoolVolumeRename(poolName, fs, fmt.Sprintf("deleted/containers/%s", uuid.NewRandom().String()))
+			err = zfsPoolVolumeRename(poolName, fs, fmt.Sprintf("deleted/containers/%s", uuid.NewRandom().String()), true)
 			if err != nil {
 				return err
 			}

From c2d69e3762bedd76deadc46507a86193c96c7a02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Fri, 29 Mar 2019 17:13:39 -0400
Subject: [PATCH 08/10] lxd/containers: Add shiftfs support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/container_lxc.go | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index c4cff68a3b..a25923748e 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -1159,6 +1159,31 @@ func (c *containerLXC) initLXC(config bool) error {
 		return err
 	}
 
+	diskIdmap, err := c.DiskIdmap()
+	if err != nil {
+		return err
+	}
+
+	if c.state.OS.Shiftfs && !c.IsPrivileged() && diskIdmap == nil {
+		// Host side mark mount
+		err = lxcSetConfigItem(cc, "lxc.hook.pre-start", fmt.Sprintf("/bin/mount -t shiftfs -o mark,passthrough=3 %s %s", c.RootfsPath(), c.RootfsPath()))
+		if err != nil {
+			return err
+		}
+
+		// Container side shift mount
+		err = lxcSetConfigItem(cc, "lxc.hook.pre-mount", fmt.Sprintf("/bin/mount -t shiftfs -o passthrough=3 %s %s", c.RootfsPath(), c.RootfsPath()))
+		if err != nil {
+			return err
+		}
+
+		// Host side umount of mark mount
+		err = lxcSetConfigItem(cc, "lxc.hook.start-host", fmt.Sprintf("/bin/umount -l %s", c.RootfsPath()))
+		if err != nil {
+			return err
+		}
+	}
+
 	err = lxcSetConfigItem(cc, "lxc.hook.post-stop", fmt.Sprintf("%s callhook %s %d stop", c.state.OS.ExecPath, shared.VarPath(""), c.id))
 	if err != nil {
 		return err
@@ -1986,7 +2011,7 @@ func (c *containerLXC) startCommon() (string, error) {
 		return "", errors.Wrap(err, "Set last ID map")
 	}
 
-	if !reflect.DeepEqual(nextIdmap, diskIdmap) {
+	if !reflect.DeepEqual(nextIdmap, diskIdmap) && !(diskIdmap == nil && c.state.OS.Shiftfs) {
 		if shared.IsTrue(c.expandedConfig["security.protection.shift"]) {
 			return "", fmt.Errorf("Container is protected against filesystem shifting")
 		}
@@ -2013,7 +2038,7 @@ func (c *containerLXC) startCommon() (string, error) {
 			}
 		}
 
-		if nextIdmap != nil {
+		if nextIdmap != nil && !c.state.OS.Shiftfs {
 			if c.Storage().GetStorageType() == storageTypeZfs {
 				err = nextIdmap.ShiftRootfs(c.RootfsPath(), zfsIdmapSetSkipper)
 			} else {
@@ -2028,7 +2053,7 @@ func (c *containerLXC) startCommon() (string, error) {
 		}
 
 		jsonDiskIdmap := "[]"
-		if nextIdmap != nil {
+		if nextIdmap != nil && !c.state.OS.Shiftfs {
 			idmapBytes, err := json.Marshal(nextIdmap.Idmap)
 			if err != nil {
 				return "", err
@@ -2445,6 +2470,9 @@ func (c *containerLXC) startCommon() (string, error) {
 		return "", fmt.Errorf("Error updating last used: %v", err)
 	}
 
+	// Unmount any previously mounted shiftfs
+	syscall.Unmount(c.RootfsPath(), syscall.MNT_DETACH)
+
 	return configPath, nil
 }
 

From cd02ec2f6caddba3e0320153f8488cef68961417 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Thu, 28 Mar 2019 17:02:25 -0400
Subject: [PATCH 09/10] tests: Make proxy tests work with shiftfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 test/suites/proxy.sh | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/test/suites/proxy.sh b/test/suites/proxy.sh
index 9e5bee3688..ac38f17b23 100644
--- a/test/suites/proxy.sh
+++ b/test/suites/proxy.sh
@@ -160,9 +160,10 @@ test_proxy_device_unix() {
   # Initial test
   lxc config device add proxyTester proxyDev proxy "listen=unix:${HOST_SOCK}" connect=unix:/tmp/"lxdtest-$(basename "${LXD_DIR}").sock" bind=host
   (
-    cd "${LXD_DIR}/containers/proxyTester/rootfs/tmp/" || exit
+    PID="$(lxc query /1.0/containers/proxyTester/state | jq .pid)"
+    cd "/proc/${PID}/root/tmp/" || exit
     umask 0000
-    exec nsenter -n -U -t "$(lxc query /1.0/containers/proxyTester/state | jq .pid)" -- socat unix-listen:"lxdtest-$(basename "${LXD_DIR}").sock",unlink-early exec:/bin/cat
+    exec nsenter -n -U -t "${PID}" -- socat unix-listen:"lxdtest-$(basename "${LXD_DIR}").sock",unlink-early exec:/bin/cat
   ) &
   NSENTER_PID=$!
   sleep 0.5
@@ -181,9 +182,10 @@ test_proxy_device_unix() {
   # Restart the container
   lxc restart -f proxyTester
   (
-    cd "${LXD_DIR}/containers/proxyTester/rootfs/tmp/" || exit
+    PID="$(lxc query /1.0/containers/proxyTester/state | jq .pid)"
+    cd "/proc/${PID}/root/tmp/" || exit
     umask 0000
-    exec nsenter -n -U -t "$(lxc query /1.0/containers/proxyTester/state | jq .pid)" -- socat unix-listen:"lxdtest-$(basename "${LXD_DIR}").sock",unlink-early exec:/bin/cat
+    exec nsenter -n -U -t "${PID}" -- socat unix-listen:"lxdtest-$(basename "${LXD_DIR}").sock",unlink-early exec:/bin/cat
   ) &
   NSENTER_PID=$!
   sleep 0.5
@@ -202,9 +204,10 @@ test_proxy_device_unix() {
   # Change the socket
   lxc config device set proxyTester proxyDev connect unix:/tmp/"lxdtest-$(basename "${LXD_DIR}")-2.sock"
   (
-    cd "${LXD_DIR}/containers/proxyTester/rootfs/tmp/" || exit
+    PID="$(lxc query /1.0/containers/proxyTester/state | jq .pid)"
+    cd "/proc/${PID}/root/tmp/" || exit
     umask 0000
-    exec nsenter -n -U -t "$(lxc query /1.0/containers/proxyTester/state | jq .pid)" -- socat unix-listen:"lxdtest-$(basename "${LXD_DIR}")-2.sock",unlink-early exec:/bin/cat
+    exec nsenter -n -U -t "${PID}" -- socat unix-listen:"lxdtest-$(basename "${LXD_DIR}")-2.sock",unlink-early exec:/bin/cat
   ) &
   NSENTER_PID=$!
   sleep 0.5
@@ -237,9 +240,10 @@ test_proxy_device_tcp_unix() {
   # Initial test
   lxc config device add proxyTester proxyDev proxy "listen=tcp:127.0.0.1:${HOST_TCP_PORT}" connect=unix:/tmp/"lxdtest-$(basename "${LXD_DIR}").sock" bind=host
   (
-    cd "${LXD_DIR}/containers/proxyTester/rootfs/tmp/" || exit
+    PID="$(lxc query /1.0/containers/proxyTester/state | jq .pid)"
+    cd "/proc/${PID}/root/tmp/" || exit
     umask 0000
-    exec nsenter -n -U -t "$(lxc query /1.0/containers/proxyTester/state | jq .pid)" -- socat unix-listen:"lxdtest-$(basename "${LXD_DIR}").sock",unlink-early exec:/bin/cat
+    exec nsenter -n -U -t "${PID}" -- socat unix-listen:"lxdtest-$(basename "${LXD_DIR}").sock",unlink-early exec:/bin/cat
   ) &
   NSENTER_PID=$!
   sleep 0.5
@@ -256,9 +260,10 @@ test_proxy_device_tcp_unix() {
   # Restart the container
   lxc restart -f proxyTester
   (
-    cd "${LXD_DIR}/containers/proxyTester/rootfs/tmp/" || exit
+    PID="$(lxc query /1.0/containers/proxyTester/state | jq .pid)"
+    cd "/proc/${PID}/root/tmp/" || exit
     umask 0000
-    exec nsenter -n -U -t "$(lxc query /1.0/containers/proxyTester/state | jq .pid)" -- socat unix-listen:"lxdtest-$(basename "${LXD_DIR}").sock",unlink-early exec:/bin/cat
+    exec nsenter -n -U -t "${PID}" -- socat unix-listen:"lxdtest-$(basename "${LXD_DIR}").sock",unlink-early exec:/bin/cat
   ) &
   NSENTER_PID=$!
   sleep 0.5
@@ -275,9 +280,10 @@ test_proxy_device_tcp_unix() {
   # Change the socket
   lxc config device set proxyTester proxyDev connect unix:/tmp/"lxdtest-$(basename "${LXD_DIR}")-2.sock"
   (
-    cd "${LXD_DIR}/containers/proxyTester/rootfs/tmp/" || exit
+    PID="$(lxc query /1.0/containers/proxyTester/state | jq .pid)"
+    cd "/proc/${PID}/root/tmp/" || exit
     umask 0000
-    exec nsenter -n -U -t "$(lxc query /1.0/containers/proxyTester/state | jq .pid)" -- socat unix-listen:"lxdtest-$(basename "${LXD_DIR}")-2.sock",unlink-early exec:/bin/cat
+    exec nsenter -n -U -t "${PID}" -- socat unix-listen:"lxdtest-$(basename "${LXD_DIR}")-2.sock",unlink-early exec:/bin/cat
   ) &
   NSENTER_PID=$!
   sleep 0.5

From bd48d0d73a5b2a77074e3875042999105da6d292 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Fri, 29 Mar 2019 17:13:24 -0400
Subject: [PATCH 10/10] tests: Make security tests work with shiftfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 test/suites/security.sh | 44 +++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/test/suites/security.sh b/test/suites/security.sh
index e95ab1d1c6..a89c846d2d 100644
--- a/test/suites/security.sh
+++ b/test/suites/security.sh
@@ -130,25 +130,27 @@ test_security_protection() {
   lxc profile unset default security.protection.delete
 
   # Test shifting protection
-  lxc init testimage c1
-  lxc start c1
-  lxc stop c1 --force
-
-  lxc profile set default security.protection.shift true
-  lxc start c1
-  lxc stop c1 --force
-
-  ! lxc publish c1 --alias=protected || false
-  lxc snapshot c1
-  lxc publish c1/snap0 --alias=protected
-  lxc image delete protected
-
-  lxc config set c1 security.privileged true
-  ! lxc start c1 || false
-  lxc config set c1 security.protection.shift false
-  lxc start c1
-  lxc stop c1 --force
-
-  lxc delete c1
-  lxc profile unset default security.protection.shift
+  if [ ! -e /sys/module/shiftfs/ ]; then
+    lxc init testimage c1
+    lxc start c1
+    lxc stop c1 --force
+
+    lxc profile set default security.protection.shift true
+    lxc start c1
+    lxc stop c1 --force
+
+    ! lxc publish c1 --alias=protected || false
+    lxc snapshot c1
+    lxc publish c1/snap0 --alias=protected
+    lxc image delete protected
+
+    lxc config set c1 security.privileged true
+    ! lxc start c1 || false
+    lxc config set c1 security.protection.shift false
+    lxc start c1
+    lxc stop c1 --force
+
+    lxc delete c1
+    lxc profile unset default security.protection.shift
+  fi
 }


More information about the lxc-devel mailing list