[lxc-devel] [lxd/master] VM: Failed startup cleanup

tomponline on Github lxc-bot at linuxcontainers.org
Mon Jan 27 09:04:06 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 550 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200127/fcbe7f38/attachment.bin>
-------------- next part --------------
From b869287f958d4d53f1685a5f85a5d490dee98843 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 24 Jan 2020 17:31:30 +0000
Subject: [PATCH 1/2] lxd/instance/drivers/qmp/monitor: Prevent crashes with
 races closing closed channel

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/instance/drivers/qmp/monitor.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lxd/instance/drivers/qmp/monitor.go b/lxd/instance/drivers/qmp/monitor.go
index 79a2692519..4b96312228 100644
--- a/lxd/instance/drivers/qmp/monitor.go
+++ b/lxd/instance/drivers/qmp/monitor.go
@@ -154,7 +154,9 @@ func (m *Monitor) Wait() (chan struct{}, error) {
 // Disconnect forces a disconnection from QEMU.
 func (m *Monitor) Disconnect() {
 	// Stop all go routines and disconnect from socket.
-	close(m.chDisconnect)
+	if !m.disconnected {
+		close(m.chDisconnect)
+	}
 	m.disconnected = true
 	m.qmp.Disconnect()
 

From a618982604575187f1a592783bfa5002fbbef817 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 24 Jan 2020 17:32:06 +0000
Subject: [PATCH 2/2] lxd/instance/drivers/driver/qemu: Improve clean up on
 start failure

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/instance/drivers/driver_qemu.go | 54 +++++++++++++++++++++++------
 1 file changed, 43 insertions(+), 11 deletions(-)

diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go
index dd4196d9dc..ef0f5dc99b 100644
--- a/lxd/instance/drivers/driver_qemu.go
+++ b/lxd/instance/drivers/driver_qemu.go
@@ -378,7 +378,6 @@ func (vm *qemu) mount() (bool, error) {
 
 // unmount the instance's config volume if needed.
 func (vm *qemu) unmount() (bool, error) {
-	var pool storagePools.Pool
 	pool, err := vm.getStoragePool()
 	if err != nil {
 		return false, err
@@ -591,15 +590,13 @@ func (vm *qemu) Start(stateful bool) error {
 	defer revert.Fail()
 
 	// Mount the instance's config volume.
-	ourMount, err := vm.mount()
+	_, err = vm.mount()
 	if err != nil {
 		op.Done(err)
 		return err
 	}
 
-	if ourMount {
-		revert.Add(func() { vm.unmount() })
-	}
+	revert.Add(func() { vm.unmount() })
 
 	err = vm.generateConfigShare()
 	if err != nil {
@@ -657,6 +654,16 @@ func (vm *qemu) Start(stateful bool) error {
 			continue
 		}
 
+		// Use a local function argument to ensure the current device is added to the reverter.
+		func(localDev deviceConfig.DeviceNamed) {
+			revert.Add(func() {
+				err := vm.deviceStop(localDev.Name, localDev.Config)
+				if err != nil {
+					logger.Errorf("Failed to cleanup device '%s': %v", localDev.Name, err)
+				}
+			})
+		}(dev)
+
 		devConfs = append(devConfs, runConf)
 	}
 
@@ -714,11 +721,13 @@ func (vm *qemu) Start(stateful bool) error {
 		err := filepath.Walk(filepath.Join(vm.Path(), "config"),
 			func(path string, info os.FileInfo, err error) error {
 				if err != nil {
+					op.Done(err)
 					return err
 				}
 
 				err = os.Chown(path, vm.state.OS.UnprivUID, -1)
 				if err != nil {
+					op.Done(err)
 					return err
 				}
 
@@ -749,7 +758,9 @@ func (vm *qemu) Start(stateful bool) error {
 	for _, file := range fdFiles {
 		f, err := os.OpenFile(file, os.O_RDWR, 0)
 		if err != nil {
-			return errors.Wrapf(err, "Error opening exta file %q", file)
+			err = errors.Wrapf(err, "Error opening exta file %q", file)
+			op.Done(err)
+			return err
 		}
 		defer f.Close() // Close file after qemu has started.
 		cmd.ExtraFiles = append(cmd.ExtraFiles, f)
@@ -762,6 +773,25 @@ func (vm *qemu) Start(stateful bool) error {
 		return err
 	}
 
+	pid, err := vm.pid()
+	if err != nil {
+		logger.Errorf(`Failed to get VM process ID "%d"`, pid)
+		return err
+	}
+
+	revert.Add(func() {
+		proc, err := os.FindProcess(pid)
+		if err != nil {
+			logger.Errorf(`Failed to find VM process "%d"`, pid)
+			return
+		}
+
+		proc.Kill()
+		if err != nil {
+			logger.Errorf(`Failed to kill VM process "%d"`, pid)
+		}
+	})
+
 	// Start QMP monitoring.
 	monitor, err := qmp.Connect(vm.getMonitorPath(), vm.getMonitorEventHandler())
 	if err != nil {
@@ -781,13 +811,17 @@ func (vm *qemu) Start(stateful bool) error {
 		// Record current state
 		err = tx.ContainerSetState(vm.id, "RUNNING")
 		if err != nil {
-			return errors.Wrap(err, "Error updating instance state")
+			err = errors.Wrap(err, "Error updating instance state")
+			op.Done(err)
+			return err
 		}
 
 		// Update time instance last started time
 		err = tx.ContainerLastUsedUpdate(vm.id, time.Now().UTC())
 		if err != nil {
-			return errors.Wrap(err, "Error updating instance last used")
+			err = errors.Wrap(err, "Error updating instance last used")
+			op.Done(err)
+			return err
 		}
 
 		return nil
@@ -797,9 +831,8 @@ func (vm *qemu) Start(stateful bool) error {
 		return err
 	}
 
-	vm.state.Events.SendLifecycle(vm.project, "virtual-machine-started", fmt.Sprintf("/1.0/virtual-machines/%s", vm.name), nil)
-
 	revert.Success()
+	vm.state.Events.SendLifecycle(vm.project, "virtual-machine-started", fmt.Sprintf("/1.0/virtual-machines/%s", vm.name), nil)
 	return nil
 }
 
@@ -938,7 +971,6 @@ func (vm *qemu) deviceStop(deviceName string, rawConfig deviceConfig.Device) err
 
 	canHotPlug, _ := d.CanHotPlug()
 
-	// An empty netns path means we haven't been called from the LXC stop hook, so are running.
 	if vm.IsRunning() && !canHotPlug {
 		return fmt.Errorf("Device cannot be stopped when instance is running")
 	}


More information about the lxc-devel mailing list