[lxc-devel] [lxd/master] VM: Failed startup cleanup
tomponline on Github
lxc-bot at linuxcontainers.org
Mon Jan 27 09:04:06 UTC 2020
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 550 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200127/fcbe7f38/attachment.bin>
-------------- next part --------------
From b869287f958d4d53f1685a5f85a5d490dee98843 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 24 Jan 2020 17:31:30 +0000
Subject: [PATCH 1/2] lxd/instance/drivers/qmp/monitor: Prevent crashes with
races closing closed channel
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/instance/drivers/qmp/monitor.go | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/lxd/instance/drivers/qmp/monitor.go b/lxd/instance/drivers/qmp/monitor.go
index 79a2692519..4b96312228 100644
--- a/lxd/instance/drivers/qmp/monitor.go
+++ b/lxd/instance/drivers/qmp/monitor.go
@@ -154,7 +154,9 @@ func (m *Monitor) Wait() (chan struct{}, error) {
// Disconnect forces a disconnection from QEMU.
func (m *Monitor) Disconnect() {
// Stop all go routines and disconnect from socket.
- close(m.chDisconnect)
+ if !m.disconnected {
+ close(m.chDisconnect)
+ }
m.disconnected = true
m.qmp.Disconnect()
From a618982604575187f1a592783bfa5002fbbef817 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 24 Jan 2020 17:32:06 +0000
Subject: [PATCH 2/2] lxd/instance/drivers/driver/qemu: Improve clean up on
start failure
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/instance/drivers/driver_qemu.go | 54 +++++++++++++++++++++++------
1 file changed, 43 insertions(+), 11 deletions(-)
diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go
index dd4196d9dc..ef0f5dc99b 100644
--- a/lxd/instance/drivers/driver_qemu.go
+++ b/lxd/instance/drivers/driver_qemu.go
@@ -378,7 +378,6 @@ func (vm *qemu) mount() (bool, error) {
// unmount the instance's config volume if needed.
func (vm *qemu) unmount() (bool, error) {
- var pool storagePools.Pool
pool, err := vm.getStoragePool()
if err != nil {
return false, err
@@ -591,15 +590,13 @@ func (vm *qemu) Start(stateful bool) error {
defer revert.Fail()
// Mount the instance's config volume.
- ourMount, err := vm.mount()
+ _, err = vm.mount()
if err != nil {
op.Done(err)
return err
}
- if ourMount {
- revert.Add(func() { vm.unmount() })
- }
+ revert.Add(func() { vm.unmount() })
err = vm.generateConfigShare()
if err != nil {
@@ -657,6 +654,16 @@ func (vm *qemu) Start(stateful bool) error {
continue
}
+ // Use a local function argument to ensure the current device is added to the reverter.
+ func(localDev deviceConfig.DeviceNamed) {
+ revert.Add(func() {
+ err := vm.deviceStop(localDev.Name, localDev.Config)
+ if err != nil {
+ logger.Errorf("Failed to cleanup device '%s': %v", localDev.Name, err)
+ }
+ })
+ }(dev)
+
devConfs = append(devConfs, runConf)
}
@@ -714,11 +721,13 @@ func (vm *qemu) Start(stateful bool) error {
err := filepath.Walk(filepath.Join(vm.Path(), "config"),
func(path string, info os.FileInfo, err error) error {
if err != nil {
+ op.Done(err)
return err
}
err = os.Chown(path, vm.state.OS.UnprivUID, -1)
if err != nil {
+ op.Done(err)
return err
}
@@ -749,7 +758,9 @@ func (vm *qemu) Start(stateful bool) error {
for _, file := range fdFiles {
f, err := os.OpenFile(file, os.O_RDWR, 0)
if err != nil {
- return errors.Wrapf(err, "Error opening exta file %q", file)
+ err = errors.Wrapf(err, "Error opening exta file %q", file)
+ op.Done(err)
+ return err
}
defer f.Close() // Close file after qemu has started.
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
@@ -762,6 +773,25 @@ func (vm *qemu) Start(stateful bool) error {
return err
}
+ pid, err := vm.pid()
+ if err != nil {
+ logger.Errorf(`Failed to get VM process ID "%d"`, pid)
+ return err
+ }
+
+ revert.Add(func() {
+ proc, err := os.FindProcess(pid)
+ if err != nil {
+ logger.Errorf(`Failed to find VM process "%d"`, pid)
+ return
+ }
+
+ proc.Kill()
+ if err != nil {
+ logger.Errorf(`Failed to kill VM process "%d"`, pid)
+ }
+ })
+
// Start QMP monitoring.
monitor, err := qmp.Connect(vm.getMonitorPath(), vm.getMonitorEventHandler())
if err != nil {
@@ -781,13 +811,17 @@ func (vm *qemu) Start(stateful bool) error {
// Record current state
err = tx.ContainerSetState(vm.id, "RUNNING")
if err != nil {
- return errors.Wrap(err, "Error updating instance state")
+ err = errors.Wrap(err, "Error updating instance state")
+ op.Done(err)
+ return err
}
// Update time instance last started time
err = tx.ContainerLastUsedUpdate(vm.id, time.Now().UTC())
if err != nil {
- return errors.Wrap(err, "Error updating instance last used")
+ err = errors.Wrap(err, "Error updating instance last used")
+ op.Done(err)
+ return err
}
return nil
@@ -797,9 +831,8 @@ func (vm *qemu) Start(stateful bool) error {
return err
}
- vm.state.Events.SendLifecycle(vm.project, "virtual-machine-started", fmt.Sprintf("/1.0/virtual-machines/%s", vm.name), nil)
-
revert.Success()
+ vm.state.Events.SendLifecycle(vm.project, "virtual-machine-started", fmt.Sprintf("/1.0/virtual-machines/%s", vm.name), nil)
return nil
}
@@ -938,7 +971,6 @@ func (vm *qemu) deviceStop(deviceName string, rawConfig deviceConfig.Device) err
canHotPlug, _ := d.CanHotPlug()
- // An empty netns path means we haven't been called from the LXC stop hook, so are running.
if vm.IsRunning() && !canHotPlug {
return fmt.Errorf("Device cannot be stopped when instance is running")
}
More information about the lxc-devel
mailing list