[lxc-devel] [lxd/master] Container: Stop non-NIC devices after container fully stopped

tomponline on Github lxc-bot at linuxcontainers.org
Tue Nov 10 12:30:09 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 377 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20201110/d5bca0c0/attachment.bin>
-------------- next part --------------
From bf653a26f2c1251e67566736c841d894eaf8fed9 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 10 Nov 2020 12:21:46 +0000
Subject: [PATCH 1/2] lxd/instance/drivers/driver/lxc: Stop devices in two
 phases

- Continue stopping NICs in the OnStopNS hook.
- Stop all other devices in the OnStop hook after the container has been fully stopped.

This is to prevent issues unmounting disk devices when the container hasn't been stopped yet.

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/instance/drivers/driver_lxc.go | 53 ++++++++++++++++++++----------
 1 file changed, 36 insertions(+), 17 deletions(-)

diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go
index faf251a23e..282c4937b2 100644
--- a/lxd/instance/drivers/driver_lxc.go
+++ b/lxd/instance/drivers/driver_lxc.go
@@ -1578,7 +1578,9 @@ func (c *lxc) deviceUpdate(deviceName string, rawConfig deviceConfig.Device, old
 }
 
 // deviceStop loads a new device and calls its Stop() function.
-func (c *lxc) deviceStop(deviceName string, rawConfig deviceConfig.Device, stopHookNetnsPath string) error {
+// Accepts a stopHookNetnsPath argument which is required when run from the onStopNS hook before the
+// container's network namespace is unmounted (which is required for NIC device cleanup).
+func (c *lxc) deviceStop(deviceName string, rawConfig deviceConfig.Device, instanceRunning bool, stopHookNetnsPath string) error {
 	logger := logging.AddContext(logger.Log, log.Ctx{"device": deviceName, "type": rawConfig["type"], "project": c.Project(), "instance": c.Name()})
 	logger.Debug("Stopping device")
 
@@ -1604,7 +1606,7 @@ func (c *lxc) deviceStop(deviceName string, rawConfig deviceConfig.Device, stopH
 	canHotPlug, _ := d.CanHotPlug()
 
 	// An empty netns path means we haven't been called from the LXC stop hook, so are running.
-	if stopHookNetnsPath == "" && !canHotPlug {
+	if instanceRunning && !canHotPlug {
 		return fmt.Errorf("Device cannot be stopped when container is running")
 	}
 
@@ -1616,14 +1618,14 @@ func (c *lxc) deviceStop(deviceName string, rawConfig deviceConfig.Device, stopH
 	if runConf != nil {
 		// If network interface settings returned, then detach NIC from container.
 		if len(runConf.NetworkInterface) > 0 {
-			err = c.deviceDetachNIC(configCopy, runConf.NetworkInterface, stopHookNetnsPath)
+			err = c.deviceDetachNIC(configCopy, runConf.NetworkInterface, instanceRunning, stopHookNetnsPath)
 			if err != nil {
 				return err
 			}
 		}
 
 		// Add cgroup rules if requested and container is running.
-		if len(runConf.CGroups) > 0 && stopHookNetnsPath == "" {
+		if len(runConf.CGroups) > 0 && instanceRunning {
 			err = c.deviceAddCgroupRules(runConf.CGroups)
 			if err != nil {
 				return err
@@ -1631,7 +1633,7 @@ func (c *lxc) deviceStop(deviceName string, rawConfig deviceConfig.Device, stopH
 		}
 
 		// Detach mounts if requested and container is running.
-		if len(runConf.Mounts) > 0 && stopHookNetnsPath == "" {
+		if len(runConf.Mounts) > 0 && instanceRunning {
 			err = c.deviceHandleMounts(runConf.Mounts)
 			if err != nil {
 				return err
@@ -1649,7 +1651,9 @@ func (c *lxc) deviceStop(deviceName string, rawConfig deviceConfig.Device, stopH
 }
 
 // deviceDetachNIC detaches a NIC device from a container.
-func (c *lxc) deviceDetachNIC(configCopy map[string]string, netIF []deviceConfig.RunConfigItem, stopHookNetnsPath string) error {
+// Accepts a stopHookNetnsPath argument which is required when run from the onStopNS hook before the
+// container's network namespace is unmounted (which is required for NIC device cleanup).
+func (c *lxc) deviceDetachNIC(configCopy map[string]string, netIF []deviceConfig.RunConfigItem, instanceRunning bool, stopHookNetnsPath string) error {
 	// Get requested device name to detach interface back to on the host.
 	devName := ""
 	for _, dev := range netIF {
@@ -1664,7 +1668,7 @@ func (c *lxc) deviceDetachNIC(configCopy map[string]string, netIF []deviceConfig
 	}
 
 	// If container is running, perform live detach of interface back to host.
-	if stopHookNetnsPath == "" {
+	if instanceRunning {
 		// For some reason, having network config confuses detach, so get our own go-lxc struct.
 		cname := project.Instance(c.Project(), c.Name())
 		cc, err := liblxc.NewContainer(cname, c.state.OS.LxcPath)
@@ -1686,9 +1690,13 @@ func (c *lxc) deviceDetachNIC(configCopy map[string]string, netIF []deviceConfig
 
 		err = cc.DetachInterfaceRename(configCopy["name"], devName)
 		if err != nil {
-			return errors.Wrapf(err, "Failed to detach interface: %s to %s", configCopy["name"], devName)
+			return errors.Wrapf(err, "Failed to detach interface: %q to %q", configCopy["name"], devName)
 		}
 	} else {
+		if stopHookNetnsPath != "" {
+			return fmt.Errorf("Cannot detach NIC device %q without stopHookNetnsPath being provided", configCopy["name"])
+		}
+
 		// Currently liblxc does not move devices back to the host on stop that were added
 		// after the the container was started. For this reason we utilise the lxc.hook.stop
 		// hook so that we can capture the netns path, enter the namespace and move the nics
@@ -1700,7 +1708,7 @@ func (c *lxc) deviceDetachNIC(configCopy map[string]string, netIF []deviceConfig
 		if !shared.PathExists(fmt.Sprintf("/sys/class/net/%s", devName)) {
 			err := c.detachInterfaceRename(stopHookNetnsPath, configCopy["name"], devName)
 			if err != nil {
-				return errors.Wrapf(err, "Failed to detach interface: %s to %s", configCopy["name"], devName)
+				return errors.Wrapf(err, "Failed to detach interface: %q to %q", configCopy["name"], devName)
 			}
 		}
 	}
@@ -2141,7 +2149,7 @@ func (c *lxc) startCommon() (string, []func() error, error) {
 		// Stop device on failure to setup container, pass non-empty stopHookNetnsPath so that stop code
 		// doesn't think container is running.
 		revert.Add(func() {
-			err := c.deviceStop(dev.Name, dev.Config, "startfailed")
+			err := c.deviceStop(dev.Name, dev.Config, false, "")
 			if err != nil {
 				logger.Errorf("Failed to cleanup device %q: %v", dev.Name, err)
 			}
@@ -2828,14 +2836,14 @@ func (c *lxc) onStopNS(args map[string]string) error {
 	target := args["target"]
 	netns := args["netns"]
 
-	// Validate target
+	// Validate target.
 	if !shared.StringInSlice(target, []string{"stop", "reboot"}) {
 		logger.Error("Container sent invalid target to OnStopNS", log.Ctx{"container": c.Name(), "target": target})
-		return fmt.Errorf("Invalid stop target: %s", target)
+		return fmt.Errorf("Invalid stop target %q", target)
 	}
 
 	// Clean up devices.
-	c.cleanupDevices(netns)
+	c.cleanupDevices(false, netns)
 
 	return nil
 }
@@ -2860,6 +2868,9 @@ func (c *lxc) onStop(args map[string]string) error {
 	// Make sure we can't call go-lxc functions by mistake
 	c.fromHook = true
 
+	// Clean up devices.
+	c.cleanupDevices(false, "")
+
 	// Remove directory ownership (to avoid issue if uidmap is re-used)
 	err := os.Chown(c.Path(), 0, 0)
 	if err != nil {
@@ -2968,14 +2979,22 @@ func (c *lxc) onStop(args map[string]string) error {
 }
 
 // cleanupDevices performs any needed device cleanup steps when container is stopped.
-func (c *lxc) cleanupDevices(netns string) {
+// Accepts a stopHookNetnsPath argument which is required when run from the onStopNS hook before the
+// container's network namespace is unmounted (which is required for NIC device cleanup).
+func (c *lxc) cleanupDevices(instanceRunning bool, stopHookNetnsPath string) {
 	for _, dev := range c.expandedDevices.Reversed() {
+		// Only stop NIC devices when run from the onStopNS hook, and stop all other devices when run from
+		// the onStop hook. This way disk devices are stopped after the instance has been fully stopped.
+		if (stopHookNetnsPath != "" && dev.Config["type"] != "nic") || (stopHookNetnsPath == "" && dev.Config["type"] == "nic") {
+			continue
+		}
+
 		// Use the device interface if device supports it.
-		err := c.deviceStop(dev.Name, dev.Config, netns)
+		err := c.deviceStop(dev.Name, dev.Config, instanceRunning, stopHookNetnsPath)
 		if err == device.ErrUnsupportedDevType {
 			continue
 		} else if err != nil {
-			logger.Errorf("Failed to stop device '%s': %v", dev.Name, err)
+			logger.Errorf("Failed to stop device %q: %v", dev.Name, err)
 		}
 	}
 }
@@ -4572,7 +4591,7 @@ func (c *lxc) updateDevices(removeDevices deviceConfig.Devices, addDevices devic
 	// Remove devices in reverse order to how they were added.
 	for _, dev := range removeDevices.Reversed() {
 		if isRunning {
-			err := c.deviceStop(dev.Name, dev.Config, "")
+			err := c.deviceStop(dev.Name, dev.Config, isRunning, "")
 			if err == device.ErrUnsupportedDevType {
 				continue // No point in trying to remove device below.
 			} else if err != nil {

From 9071005fd996e2da083aa764a4e9e753a161d3a3 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 10 Nov 2020 12:23:25 +0000
Subject: [PATCH 2/2] lxd/device/disk: Removes workaround for ceph disks now
 that disks are stopped after instance is stopped

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/device/disk.go | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/lxd/device/disk.go b/lxd/device/disk.go
index badf7b80a1..d303a5d02e 100644
--- a/lxd/device/disk.go
+++ b/lxd/device/disk.go
@@ -1293,16 +1293,10 @@ func (d *disk) postStop() error {
 
 	if strings.HasPrefix(d.config["source"], "ceph:") {
 		v := d.volatileGet()
-
-		// Run unmap async. This is needed as postStop is called from
-		// within a monitor hook, meaning that as we process this, the monitor
-		// process is still running, holding some references.
-		go func() {
-			err := diskCephRbdUnmap(v["ceph_rbd"])
-			if err != nil {
-				logger.Errorf("Failed to unmap RBD volume %q for %q: %v", v["ceph_rbd"], project.Instance(d.inst.Project(), d.inst.Name()), err)
-			}
-		}()
+		err := diskCephRbdUnmap(v["ceph_rbd"])
+		if err != nil {
+			logger.Errorf("Failed to unmap RBD volume %q for %q: %v", v["ceph_rbd"], project.Instance(d.inst.Project(), d.inst.Name()), err)
+		}
 	}
 
 	return nil


More information about the lxc-devel mailing list