[lxc-devel] [lxd/master] add new restart operation lock to avoid emitting needless stop/start events

dlemel8 on Github lxc-bot at linuxcontainers.org
Wed Oct 28 14:42:15 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 314 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20201028/a9305b7e/attachment.bin>
-------------- next part --------------
From af146c569dd0d703a709bb06fd9c89dc4b9934de Mon Sep 17 00:00:00 2001
From: Daniel Segal <dlemel8 at gmail.com>
Date: Wed, 28 Oct 2020 16:41:26 +0200
Subject: [PATCH] add new restart operation lock to avoid emitting needless
 stop/start events

Signed-off-by: Daniel Segal <dlemel8 at gmail.com>
---
 lxd/instance/drivers/driver_lxc.go  | 111 ++++++++++++++++++----------
 lxd/instance/drivers/driver_qemu.go | 105 ++++++++++++++++++--------
 2 files changed, 146 insertions(+), 70 deletions(-)

diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go
index 29a3db459d..91ae9a08b7 100644
--- a/lxd/instance/drivers/driver_lxc.go
+++ b/lxd/instance/drivers/driver_lxc.go
@@ -2330,12 +2330,18 @@ func (c *lxc) detachInterfaceRename(netns string, ifName string, hostName string
 func (c *lxc) Start(stateful bool) error {
 	var ctxMap log.Ctx
 
-	// Setup a new operation
-	op, err := operationlock.Create(c.id, "start", false, false)
-	if err != nil {
-		return errors.Wrap(err, "Create container start operation")
+	// Get current or set a new operation
+	var err error
+	op := operationlock.Get(c.id)
+	if op == nil {
+		op, err = operationlock.Create(c.id, "start", false, false)
+		if err != nil {
+			return errors.Wrap(err, "Create container start operation")
+		}
+		defer op.Done(nil)
+	} else if op.Action() != "restart" {
+		return fmt.Errorf("Container is already running a %s operation", op.Action())
 	}
-	defer op.Done(nil)
 
 	if !daemon.SharedMountsSetup {
 		return fmt.Errorf("Daemon failed to setup shared mounts base: %v. Does security.nesting need to be turned on?", err)
@@ -2471,10 +2477,12 @@ func (c *lxc) Start(stateful bool) error {
 		return err
 	}
 
-	logger.Info("Started container", ctxMap)
-	c.state.Events.SendLifecycle(c.project, "container-started",
-		fmt.Sprintf("/1.0/containers/%s", c.name), nil)
+	if op.Action() != "restart" {
+		c.state.Events.SendLifecycle(c.project, "container-started",
+			fmt.Sprintf("/1.0/containers/%s", c.name), nil)
+	}
 
+	logger.Info("Started container", ctxMap)
 	return nil
 }
 
@@ -2591,10 +2599,16 @@ func (c *lxc) Stop(stateful bool) error {
 		return fmt.Errorf("The container is already stopped")
 	}
 
-	// Setup a new operation
-	op, err := operationlock.Create(c.id, "stop", false, true)
-	if err != nil {
-		return err
+	// Get current or set a new operation
+	var err error
+	op := operationlock.Get(c.id)
+	if op == nil {
+		op, err = operationlock.Create(c.id, "stop", false, true)
+		if err != nil {
+			return err
+		}
+	} else if op.Action() != "restart" {
+		return fmt.Errorf("Container is already running a %s operation", op.Action())
 	}
 
 	ctxMap = log.Ctx{
@@ -2639,10 +2653,12 @@ func (c *lxc) Stop(stateful bool) error {
 			return err
 		}
 
-		err = op.Wait()
-		if err != nil && c.IsRunning() {
-			logger.Error("Failed stopping container", ctxMap)
-			return err
+		if op.Action() != "restart" {
+			err = op.Wait()
+			if err != nil && c.IsRunning() {
+				logger.Error("Failed stopping container", ctxMap)
+				return err
+			}
 		}
 
 		c.stateful = true
@@ -2653,10 +2669,13 @@ func (c *lxc) Stop(stateful bool) error {
 			return err
 		}
 
-		op.Done(nil)
+		if op.Action() != "restart" {
+			op.Done(nil)
+			c.state.Events.SendLifecycle(c.project, "container-stopped",
+				fmt.Sprintf("/1.0/containers/%s", c.name), nil)
+		}
+
 		logger.Info("Stopped container", ctxMap)
-		c.state.Events.SendLifecycle(c.project, "container-stopped",
-			fmt.Sprintf("/1.0/containers/%s", c.name), nil)
 		return nil
 	} else if shared.PathExists(c.StatePath()) {
 		os.RemoveAll(c.StatePath())
@@ -2711,16 +2730,18 @@ func (c *lxc) Stop(stateful bool) error {
 		return err
 	}
 
-	err = op.Wait()
-	if err != nil && c.IsRunning() {
-		logger.Error("Failed stopping container", ctxMap)
-		return err
+	if op.Action() != "restart" {
+		err = op.Wait()
+		if err != nil && c.IsRunning() {
+			logger.Error("Failed stopping container", ctxMap)
+			return err
+		}
+
+		c.state.Events.SendLifecycle(c.project, "container-stopped",
+			fmt.Sprintf("/1.0/containers/%s", c.name), nil)
 	}
 
 	logger.Info("Stopped container", ctxMap)
-	c.state.Events.SendLifecycle(c.project, "container-stopped",
-		fmt.Sprintf("/1.0/containers/%s", c.name), nil)
-
 	return nil
 }
 
@@ -2733,10 +2754,16 @@ func (c *lxc) Shutdown(timeout time.Duration) error {
 		return fmt.Errorf("The container is already stopped")
 	}
 
-	// Setup a new operation
-	op, err := operationlock.Create(c.id, "stop", true, true)
-	if err != nil {
-		return err
+	// Get current or set a new operation
+	var err error
+	op := operationlock.Get(c.id)
+	if op == nil {
+		op, err = operationlock.Create(c.id, "stop", true, true)
+		if err != nil {
+			return err
+		}
+	} else if op.Action() != "restart" {
+		return fmt.Errorf("Container is already running a %s operation", op.Action())
 	}
 
 	ctxMap = log.Ctx{
@@ -2773,21 +2800,29 @@ func (c *lxc) Shutdown(timeout time.Duration) error {
 		return err
 	}
 
-	err = op.Wait()
-	if err != nil && c.IsRunning() {
-		logger.Error("Failed shutting down container", ctxMap)
-		return err
+	if op.Action() != "restart" {
+		err = op.Wait()
+		if err != nil && c.IsRunning() {
+			logger.Error("Failed shutting down container", ctxMap)
+			return err
+		}
+
+		c.state.Events.SendLifecycle(c.project, "container-shutdown",
+			fmt.Sprintf("/1.0/containers/%s", c.name), nil)
 	}
 
 	logger.Info("Shut down container", ctxMap)
-	c.state.Events.SendLifecycle(c.project, "container-shutdown",
-		fmt.Sprintf("/1.0/containers/%s", c.name), nil)
-
 	return nil
 }
 
 // Restart restart the instance.
 func (c *lxc) Restart(timeout time.Duration) error {
+	op, err := operationlock.Create(c.id, "restart", false, false)
+	if err != nil {
+		return err
+	}
+	defer op.Done(nil)
+
 	return c.common.Restart(c, timeout)
 }
 
@@ -2822,7 +2857,7 @@ func (c *lxc) onStop(args map[string]string) error {
 
 	// Pick up the existing stop operation lock created in Stop() function.
 	op := operationlock.Get(c.id)
-	if op != nil && op.Action() != "stop" {
+	if op != nil && op.Action() != "stop" && op.Action() != "restart" {
 		return fmt.Errorf("Container is already running a %s operation", op.Action())
 	}
 
diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go
index 80d303845b..04b39e4708 100644
--- a/lxd/instance/drivers/driver_qemu.go
+++ b/lxd/instance/drivers/driver_qemu.go
@@ -517,7 +517,7 @@ func (vm *qemu) onStop(target string) error {
 
 	// Pick up the existing stop operation lock created in Stop() function.
 	op := operationlock.Get(vm.id)
-	if op != nil && op.Action() != "stop" {
+	if op != nil && op.Action() != "stop" && op.Action() != "restart" {
 		return fmt.Errorf("Instance is already running a %s operation", op.Action())
 	}
 
@@ -568,10 +568,17 @@ func (vm *qemu) Shutdown(timeout time.Duration) error {
 		return fmt.Errorf("The instance is already stopped")
 	}
 
-	// Setup a new operation
-	op, err := operationlock.Create(vm.id, "stop", true, true)
-	if err != nil {
-		return err
+	// Get current or set a new operation
+	var err error
+	op := operationlock.Get(vm.id)
+	if op == nil {
+		op, err = operationlock.Create(vm.id, "stop", true, true)
+		if err != nil {
+			return err
+		}
+
+	} else if op.Action() != "restart" {
+		return fmt.Errorf("Container is already running a %s operation", op.Action())
 	}
 
 	// Connect to the monitor.
@@ -585,7 +592,9 @@ func (vm *qemu) Shutdown(timeout time.Duration) error {
 	chDisconnect, err := monitor.Wait()
 	if err != nil {
 		if err == qmp.ErrMonitorDisconnect {
-			op.Done(nil)
+			if op.Action() != "restart" {
+				op.Done(nil)
+			}
 			return nil
 		}
 
@@ -597,7 +606,9 @@ func (vm *qemu) Shutdown(timeout time.Duration) error {
 	err = monitor.Powerdown()
 	if err != nil {
 		if err == qmp.ErrMonitorDisconnect {
-			op.Done(nil)
+			if op.Action() != "restart" {
+				op.Done(nil)
+			}
 			return nil
 		}
 
@@ -618,19 +629,28 @@ func (vm *qemu) Shutdown(timeout time.Duration) error {
 		<-chDisconnect // Block until VM is not running if no timeout provided.
 	}
 
-	// Wait for onStop.
-	err = op.Wait()
-	if err != nil && vm.IsRunning() {
-		return err
+	if op.Action() != "restart" {
+		// Wait for onStop.
+		err = op.Wait()
+		if err != nil && vm.IsRunning() {
+			return err
+		}
+
+		op.Done(nil)
+		vm.state.Events.SendLifecycle(vm.project, "virtual-machine-shutdown", fmt.Sprintf("/1.0/virtual-machines/%s", vm.name), nil)
 	}
 
-	op.Done(nil)
-	vm.state.Events.SendLifecycle(vm.project, "virtual-machine-shutdown", fmt.Sprintf("/1.0/virtual-machines/%s", vm.name), nil)
 	return nil
 }
 
 // Restart restart the instance.
 func (vm *qemu) Restart(timeout time.Duration) error {
+	op, err := operationlock.Create(vm.id, "restart", false, false)
+	if err != nil {
+		return err
+	}
+	defer op.Done(nil)
+
 	return vm.common.Restart(vm, timeout)
 }
 
@@ -654,12 +674,17 @@ func (vm *qemu) Start(stateful bool) error {
 		return fmt.Errorf("The instance is already running")
 	}
 
-	// Setup a new operation
-	op, err := operationlock.Create(vm.id, "start", false, false)
-	if err != nil {
-		return errors.Wrap(err, "Create instance start operation")
+	// Get current or set a new operation
+	op := operationlock.Get(vm.id)
+	if op == nil {
+		op, err = operationlock.Create(vm.id, "start", false, false)
+		if err != nil {
+			return errors.Wrap(err, "Create instance start operation")
+		}
+		defer op.Done(nil)
+	} else if op.Action() != "restart" {
+		return fmt.Errorf("Container is already running a %s operation", op.Action())
 	}
-	defer op.Done(nil)
 
 	revert := revert.New()
 	defer revert.Fail()
@@ -1021,7 +1046,9 @@ func (vm *qemu) Start(stateful bool) error {
 	}
 
 	revert.Success()
-	vm.state.Events.SendLifecycle(vm.project, "virtual-machine-started", fmt.Sprintf("/1.0/virtual-machines/%s", vm.name), nil)
+	if op.Action() != "restart" {
+		vm.state.Events.SendLifecycle(vm.project, "virtual-machine-started", fmt.Sprintf("/1.0/virtual-machines/%s", vm.name), nil)
+	}
 	return nil
 }
 
@@ -2299,17 +2326,25 @@ func (vm *qemu) Stop(stateful bool) error {
 		return fmt.Errorf("Stateful stop isn't supported for VMs at this time")
 	}
 
-	// Setup a new operation.
-	op, err := operationlock.Create(vm.id, "stop", false, true)
-	if err != nil {
-		return err
+	// Get current or set a new operation
+	var err error
+	op := operationlock.Get(vm.id)
+	if op == nil {
+		op, err = operationlock.Create(vm.id, "stop", false, true)
+		if err != nil {
+			return err
+		}
+	} else if op.Action() != "restart" {
+		return fmt.Errorf("Container is already running a %s operation", op.Action())
 	}
 
 	// Connect to the monitor.
 	monitor, err := qmp.Connect(vm.monitorPath(), qemuSerialChardevName, vm.getMonitorEventHandler())
 	if err != nil {
 		// If we fail to connect, it's most likely because the VM is already off.
-		op.Done(nil)
+		if op.Action() != "restart" {
+			op.Done(nil)
+		}
 		return nil
 	}
 
@@ -2317,7 +2352,9 @@ func (vm *qemu) Stop(stateful bool) error {
 	chDisconnect, err := monitor.Wait()
 	if err != nil {
 		if err == qmp.ErrMonitorDisconnect {
-			op.Done(nil)
+			if op.Action() != "restart" {
+				op.Done(nil)
+			}
 			return nil
 		}
 
@@ -2329,7 +2366,9 @@ func (vm *qemu) Stop(stateful bool) error {
 	err = monitor.Quit()
 	if err != nil {
 		if err == qmp.ErrMonitorDisconnect {
-			op.Done(nil)
+			if op.Action() != "restart" {
+				op.Done(nil)
+			}
 			return nil
 		}
 
@@ -2340,13 +2379,15 @@ func (vm *qemu) Stop(stateful bool) error {
 	// Wait for QEMU to exit (can take a while if pending I/O).
 	<-chDisconnect
 
-	// Wait for onStop.
-	err = op.Wait()
-	if err != nil && vm.IsRunning() {
-		return err
-	}
+	if op.Action() != "restart" {
+		// Wait for onStop.
+		err = op.Wait()
+		if err != nil && vm.IsRunning() {
+			return err
+		}
 
-	vm.state.Events.SendLifecycle(vm.project, "virtual-machine-stopped", fmt.Sprintf("/1.0/virtual-machines/%s", vm.name), nil)
+		vm.state.Events.SendLifecycle(vm.project, "virtual-machine-stopped", fmt.Sprintf("/1.0/virtual-machines/%s", vm.name), nil)
+	}
 	return nil
 }
 


More information about the lxc-devel mailing list