[lxc-devel] [lxd/master] Improve instance operations and fully implement restarted event

stgraber on Github lxc-bot at linuxcontainers.org
Thu Nov 26 00:52:45 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 301 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20201125/36264648/attachment-0001.bin>
-------------- next part --------------
From ab85b3dcc245dfbcfc326bbf88fb9a13061133b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 25 Nov 2020 18:44:25 -0500
Subject: [PATCH 1/6] lxd/instance/operations: Allow Wait/Done on nil struct
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/instance/operationlock/operationlock.go | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/lxd/instance/operationlock/operationlock.go b/lxd/instance/operationlock/operationlock.go
index 6b4a04c485..c9db4ba432 100644
--- a/lxd/instance/operationlock/operationlock.go
+++ b/lxd/instance/operationlock/operationlock.go
@@ -86,6 +86,10 @@ func (op *InstanceOperation) Reset() error {
 
 // Wait waits for an operation to finish.
 func (op *InstanceOperation) Wait() error {
+	if op == nil {
+		return nil
+	}
+
 	<-op.chanDone
 
 	return op.err
@@ -96,6 +100,11 @@ func (op *InstanceOperation) Done(err error) {
 	instanceOperationsLock.Lock()
 	defer instanceOperationsLock.Unlock()
 
+	// This function can be called on a nil struct.
+	if op == nil {
+		return
+	}
+
 	// Check if already done
 	runningOp, ok := instanceOperations[op.id]
 	if !ok || runningOp != op {

From 5084fb9f6165af18c32777fedd13df01900cd8ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 25 Nov 2020 18:48:12 -0500
Subject: [PATCH 2/6] lxd/instance/lxc: Improve use of operations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/instance/drivers/driver_lxc.go | 104 ++++++++++++-----------------
 1 file changed, 42 insertions(+), 62 deletions(-)

diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go
index ab7eeba24c..c5ecf02179 100644
--- a/lxd/instance/drivers/driver_lxc.go
+++ b/lxd/instance/drivers/driver_lxc.go
@@ -2227,12 +2227,15 @@ func (d *lxc) Start(stateful bool) error {
 	defer op.Done(nil)
 
 	if !daemon.SharedMountsSetup {
-		return fmt.Errorf("Daemon failed to setup shared mounts base: %v. Does security.nesting need to be turned on?", err)
+		err = fmt.Errorf("Daemon failed to setup shared mounts base. Does security.nesting need to be turned on?")
+		op.Done(err)
+		return err
 	}
 
 	// Run the shared start code
 	configPath, postStartHooks, err := d.startCommon()
 	if err != nil {
+		op.Done(err)
 		return errors.Wrap(err, "Failed preparing container for start")
 	}
 
@@ -2250,7 +2253,9 @@ func (d *lxc) Start(stateful bool) error {
 	// If stateful, restore now
 	if stateful {
 		if !d.stateful {
-			return fmt.Errorf("Container has no existing state to restore")
+			err = fmt.Errorf("Container has no existing state to restore")
+			op.Done(err)
+			return err
 		}
 
 		criuMigrationArgs := instance.CriuMigrationArgs{
@@ -2265,6 +2270,7 @@ func (d *lxc) Start(stateful bool) error {
 
 		err := d.Migrate(&criuMigrationArgs)
 		if err != nil && !d.IsRunning() {
+			op.Done(err)
 			return errors.Wrap(err, "Migrate")
 		}
 
@@ -2273,7 +2279,7 @@ func (d *lxc) Start(stateful bool) error {
 
 		err = d.state.Cluster.UpdateInstanceStatefulFlag(d.id, false)
 		if err != nil {
-			logger.Error("Failed starting container", ctxMap)
+			op.Done(err)
 			return errors.Wrap(err, "Start container")
 		}
 
@@ -2281,8 +2287,9 @@ func (d *lxc) Start(stateful bool) error {
 		err = d.runHooks(postStartHooks)
 		if err != nil {
 			// Attempt to stop container.
-			op.Done(err)
 			d.Stop(false)
+
+			op.Done(err)
 			return err
 		}
 
@@ -2292,12 +2299,14 @@ func (d *lxc) Start(stateful bool) error {
 		/* stateless start required when we have state, let's delete it */
 		err := os.RemoveAll(d.StatePath())
 		if err != nil {
+			op.Done(err)
 			return err
 		}
 
 		d.stateful = false
 		err = d.state.Cluster.UpdateInstanceStatefulFlag(d.id, false)
 		if err != nil {
+			op.Done(err)
 			return errors.Wrap(err, "Persist stateful flag")
 		}
 	}
@@ -2342,6 +2351,7 @@ func (d *lxc) Start(stateful bool) error {
 		logger.Error("Failed starting container", ctxMap)
 
 		// Return the actual error
+		op.Done(err)
 		return err
 	}
 
@@ -2349,8 +2359,9 @@ func (d *lxc) Start(stateful bool) error {
 	err = d.runHooks(postStartHooks)
 	if err != nil {
 		// Attempt to stop container.
-		op.Done(err)
 		d.Stop(false)
+
+		op.Done(err)
 		return err
 	}
 
@@ -2482,7 +2493,6 @@ func (d *lxc) Stop(stateful bool) error {
 		err := os.MkdirAll(stateDir, 0700)
 		if err != nil {
 			op.Done(err)
-			logger.Error("Failed stopping container", ctxMap)
 			return err
 		}
 
@@ -2500,28 +2510,25 @@ func (d *lxc) Stop(stateful bool) error {
 		err = d.Migrate(&criuMigrationArgs)
 		if err != nil {
 			op.Done(err)
-			logger.Error("Failed stopping container", ctxMap)
 			return err
 		}
 
 		err = op.Wait()
 		if err != nil && d.IsRunning() {
-			logger.Error("Failed stopping container", ctxMap)
 			return err
 		}
 
 		d.stateful = true
 		err = d.state.Cluster.UpdateInstanceStatefulFlag(d.id, true)
 		if err != nil {
-			op.Done(err)
 			logger.Error("Failed stopping container", ctxMap)
 			return err
 		}
 
-		op.Done(nil)
 		logger.Info("Stopped container", ctxMap)
 		d.state.Events.SendLifecycle(d.project, "container-stopped",
 			fmt.Sprintf("/1.0/containers/%s", d.name), nil)
+
 		return nil
 	} else if shared.PathExists(d.StatePath()) {
 		os.RemoveAll(d.StatePath())
@@ -2532,14 +2539,12 @@ func (d *lxc) Stop(stateful bool) error {
 		err = d.initLXC(true)
 		if err != nil {
 			op.Done(err)
-			logger.Error("Failed stopping container", ctxMap)
 			return err
 		}
 	} else {
 		err = d.initLXC(false)
 		if err != nil {
 			op.Done(err)
-			logger.Error("Failed stopping container", ctxMap)
 			return err
 		}
 	}
@@ -2570,15 +2575,14 @@ func (d *lxc) Stop(stateful bool) error {
 		}
 	}
 
-	if err := d.c.Stop(); err != nil {
+	err = d.c.Stop()
+	if err != nil {
 		op.Done(err)
-		logger.Error("Failed stopping container", ctxMap)
 		return err
 	}
 
 	err = op.Wait()
 	if err != nil && d.IsRunning() {
-		logger.Error("Failed stopping container", ctxMap)
 		return err
 	}
 
@@ -2620,27 +2624,24 @@ func (d *lxc) Shutdown(timeout time.Duration) error {
 		err = d.initLXC(true)
 		if err != nil {
 			op.Done(err)
-			logger.Error("Failed stopping container", ctxMap)
 			return err
 		}
 	} else {
 		err = d.initLXC(false)
 		if err != nil {
 			op.Done(err)
-			logger.Error("Failed stopping container", ctxMap)
 			return err
 		}
 	}
 
-	if err := d.c.Shutdown(timeout); err != nil {
+	err = d.c.Shutdown(timeout)
+	if err != nil {
 		op.Done(err)
-		logger.Error("Failed shutting down container", ctxMap)
 		return err
 	}
 
 	err = op.Wait()
 	if err != nil && d.IsRunning() {
-		logger.Error("Failed shutting down container", ctxMap)
 		return err
 	}
 
@@ -2705,13 +2706,15 @@ func (d *lxc) onStop(args map[string]string) error {
 		"stateful":  false}
 
 	if op == nil {
-		logger.Info(fmt.Sprintf("Container initiated %s", target), ctxMap)
+		logger.Debug(fmt.Sprintf("Container initiated %s", target), ctxMap)
 	}
 
 	// Record power state
 	err := d.state.Cluster.UpdateInstancePowerState(d.id, "STOPPED")
 	if err != nil {
-		logger.Error("Failed to set container state", log.Ctx{"container": d.Name(), "err": err})
+		err = errors.Wrap(err, "Failed to set container state")
+		op.Done(err)
+		return err
 	}
 
 	go func(d *lxc, target string, op *operationlock.InstanceOperation) {
@@ -2719,13 +2722,11 @@ func (d *lxc) onStop(args map[string]string) error {
 		err = nil
 
 		// Unlock on return
-		if op != nil {
-			defer op.Done(err)
-		}
+		defer op.Done(nil)
 
 		// Wait for other post-stop actions to be done and the container actually stopping.
 		d.IsRunning()
-		logger.Debug("Container stopped, starting storage cleanup", log.Ctx{"container": d.Name()})
+		logger.Debug("Container stopped, cleaning up", log.Ctx{"container": d.Name()})
 
 		// Clean up devices.
 		d.cleanupDevices(false, "")
@@ -2733,56 +2734,42 @@ func (d *lxc) onStop(args map[string]string) error {
 		// Remove directory ownership (to avoid issue if uidmap is re-used)
 		err := os.Chown(d.Path(), 0, 0)
 		if err != nil {
-			if op != nil {
-				op.Done(err)
-			}
-
-			logger.Error("Failed clearing ownernship", log.Ctx{"container": d.Name(), "err": err, "path": d.Path()})
+			op.Done(errors.Wrap(err, "Failed clearing ownership"))
+			return
 		}
 
 		err = os.Chmod(d.Path(), 0100)
 		if err != nil {
-			if op != nil {
-				op.Done(err)
-			}
-
-			logger.Error("Failed clearing permissions", log.Ctx{"container": d.Name(), "err": err, "path": d.Path()})
+			op.Done(errors.Wrap(err, "Failed clearing permissions"))
+			return
 		}
 
 		// Stop the storage for this container
 		_, err = d.unmount()
 		if err != nil {
-			if op != nil {
-				op.Done(err)
-			}
-
-			logger.Error("Failed unnounting container", log.Ctx{"container": d.Name(), "err": err})
+			op.Done(errors.Wrap(err, "Failed unmounting container"))
+			return
 		}
 
 		// Unload the apparmor profile
 		err = apparmor.InstanceUnload(d.state, d)
 		if err != nil {
-			logger.Error("Failed to destroy apparmor namespace", log.Ctx{"container": d.Name(), "err": err})
+			op.Done(errors.Wrap(err, "Failed to destroy apparmor namespace"))
+			return
 		}
 
 		// Clean all the unix devices
 		err = d.removeUnixDevices()
 		if err != nil {
-			if op != nil {
-				op.Done(err)
-			}
-
-			logger.Error("Unable to remove unix devices", log.Ctx{"container": d.Name(), "err": err})
+			op.Done(errors.Wrap(err, "Failed to remove unix devices"))
+			return
 		}
 
 		// Clean all the disk devices
 		err = d.removeDiskDevices()
 		if err != nil {
-			if op != nil {
-				op.Done(err)
-			}
-
-			logger.Error("Unable to remove disk devices", log.Ctx{"container": d.Name(), "err": err})
+			op.Done(errors.Wrap(err, "Failed to remove disk devices"))
+			return
 		}
 
 		// Log and emit lifecycle if not user triggered
@@ -2796,11 +2783,7 @@ func (d *lxc) onStop(args map[string]string) error {
 			// Start the container again
 			err = d.Start(false)
 			if err != nil {
-				if op != nil {
-					op.Done(err)
-				}
-
-				logger.Error("Failed restarting container", log.Ctx{"container": d.Name(), "err": err})
+				op.Done(errors.Wrap(err, "Failed restarting container"))
 				return
 			}
 
@@ -2815,11 +2798,8 @@ func (d *lxc) onStop(args map[string]string) error {
 		if d.ephemeral {
 			err = d.Delete()
 			if err != nil {
-				if op != nil {
-					op.Done(err)
-				}
-
-				logger.Error("Failed deleting ephemeral", log.Ctx{"container": d.Name(), "err": err})
+				op.Done(errors.Wrap(err, "Failed deleting ephemeral container"))
+				return
 			}
 		}
 	}(d, target, op)

From 80763eba205580b8dc9dc989611121227f72926f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 25 Nov 2020 18:48:44 -0500
Subject: [PATCH 3/6] lxd/instance/lxc: Improve locking on file ops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/instance/drivers/driver_lxc.go | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go
index c5ecf02179..08b5758128 100644
--- a/lxd/instance/drivers/driver_lxc.go
+++ b/lxd/instance/drivers/driver_lxc.go
@@ -4980,6 +4980,9 @@ func (d *lxc) inheritInitPidFd() (int, *os.File) {
 
 // FileExists returns whether file exists inside instance.
 func (d *lxc) FileExists(path string) error {
+	// Check for ongoing operations (that may involve shifting).
+	operationlock.Get(d.id).Wait()
+
 	// Setup container storage if needed
 	_, err := d.mount()
 	if err != nil {
@@ -5026,10 +5029,7 @@ func (d *lxc) FileExists(path string) error {
 // FilePull gets a file from the instance.
 func (d *lxc) FilePull(srcpath string, dstpath string) (int64, int64, os.FileMode, string, []string, error) {
 	// Check for ongoing operations (that may involve shifting).
-	op := operationlock.Get(d.id)
-	if op != nil {
-		op.Wait()
-	}
+	operationlock.Get(d.id).Wait()
 
 	// Setup container storage if needed
 	_, err := d.mount()
@@ -5152,10 +5152,7 @@ func (d *lxc) FilePull(srcpath string, dstpath string) (int64, int64, os.FileMod
 // FilePush sends a file into the instance.
 func (d *lxc) FilePush(fileType string, srcpath string, dstpath string, uid int64, gid int64, mode int, write string) error {
 	// Check for ongoing operations (that may involve shifting).
-	op := operationlock.Get(d.id)
-	if op != nil {
-		op.Wait()
-	}
+	operationlock.Get(d.id).Wait()
 
 	var rootUID int64
 	var rootGID int64
@@ -5244,6 +5241,9 @@ func (d *lxc) FilePush(fileType string, srcpath string, dstpath string, uid int6
 
 // FileRemove removes a file inside the instance.
 func (d *lxc) FileRemove(path string) error {
+	// Check for ongoing operations (that may involve shifting).
+	operationlock.Get(d.id).Wait()
+
 	var errStr string
 
 	// Setup container storage if needed

From 9a6f8f6d6f772149e1a72fb3a790065eab2fb047 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 25 Nov 2020 18:50:25 -0500
Subject: [PATCH 4/6] lxd/instance/operations: Introduce CreateWaitGet
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/instance/operationlock/operationlock.go | 39 +++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/lxd/instance/operationlock/operationlock.go b/lxd/instance/operationlock/operationlock.go
index c9db4ba432..e55f04dffc 100644
--- a/lxd/instance/operationlock/operationlock.go
+++ b/lxd/instance/operationlock/operationlock.go
@@ -4,6 +4,8 @@ import (
 	"fmt"
 	"sync"
 	"time"
+
+	"github.com/lxc/lxd/shared"
 )
 
 var instanceOperationsLock sync.Mutex
@@ -66,6 +68,43 @@ func Create(instanceID int, action string, reusable bool, reuse bool) (*Instance
 	return op, nil
 }
 
+// CreateWaitGet is a weird function which does what we happen to want most of the time.
+//
+// If the instance has an operation of the same type and it's not reusable
+// or the caller doesn't want to reuse it, the function will wait and
+// indicate that it did so.
+//
+// If the instance has an operation of one of the alternate types, then
+// the operation is returned to the user.
+//
+// If the instance doesn't have an operation, has an operation of a different
+// type that is not in the alternate list or has the right type and is
+// being reused, then this behaves as a Create call.
+func CreateWaitGet(instanceID int, action string, altActions []string, reusable bool, reuse bool) (bool, *InstanceOperation, error) {
+	op := Get(instanceID)
+
+	// No existing operation, call create.
+	if op == nil {
+		op, err := Create(instanceID, action, reusable, reuse)
+		return false, op, err
+	}
+
+	// Operation matches and not reusable or asked to reuse, wait.
+	if op.action == action && (!reuse || !op.reusable) {
+		err := op.Wait()
+		return true, nil, err
+	}
+
+	// Operation matches one the alternate actions, return the operation.
+	if shared.StringInSlice(op.action, altActions) {
+		return false, op, nil
+	}
+
+	// Send the rest to Create
+	op, err := Create(instanceID, action, reusable, reuse)
+	return false, op, err
+}
+
 // Get retrieves an existing lock or returns nil if no lock exists.
 func Get(instanceID int) *InstanceOperation {
 	instanceOperationsLock.Lock()

From c001bbb6e71d61f802599448edec71214e6d258d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 25 Nov 2020 18:50:49 -0500
Subject: [PATCH 5/6] lxd/instance: Introduce restart tracking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #8010
Closes #8090

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
Signed-off-by: Daniel Segal <dlemel8 at gmail.com>
---
 lxd/instance/drivers/driver_common.go |  27 ++++-
 lxd/instance/drivers/driver_lxc.go    | 118 +++++++++++++++-----
 lxd/instance/drivers/driver_qemu.go   | 148 ++++++++++++++++++--------
 3 files changed, 217 insertions(+), 76 deletions(-)

diff --git a/lxd/instance/drivers/driver_common.go b/lxd/instance/drivers/driver_common.go
index 395a200cf7..ecb5d9bf55 100644
--- a/lxd/instance/drivers/driver_common.go
+++ b/lxd/instance/drivers/driver_common.go
@@ -14,6 +14,7 @@ import (
 	"github.com/lxc/lxd/lxd/device/nictype"
 	"github.com/lxc/lxd/lxd/instance"
 	"github.com/lxc/lxd/lxd/instance/instancetype"
+	"github.com/lxc/lxd/lxd/instance/operationlock"
 	"github.com/lxc/lxd/lxd/operations"
 	"github.com/lxc/lxd/lxd/project"
 	"github.com/lxc/lxd/lxd/state"
@@ -430,23 +431,45 @@ func (d *common) expandDevices(profiles []api.Profile) error {
 
 // restart handles instance restarts.
 func (d *common) restart(inst instance.Instance, timeout time.Duration) error {
+	// Setup a new operation for the stop/shutdown phase.
+	op, err := operationlock.Create(d.id, "restart", true, true)
+	if err != nil {
+		return errors.Wrap(err, "Create restart operation")
+	}
+
 	if timeout == 0 {
 		err := inst.Stop(false)
 		if err != nil {
+			op.Done(err)
 			return err
 		}
 	} else {
 		if inst.IsFrozen() {
-			return errors.New("Instance is not running")
+			err = fmt.Errorf("Instance is not running")
+			op.Done(err)
+			return err
 		}
 
 		err := inst.Shutdown(timeout * time.Second)
 		if err != nil {
+			op.Done(err)
 			return err
 		}
 	}
 
-	return inst.Start(false)
+	// Setup a new operation for the start phase.
+	op, err = operationlock.Create(d.id, "restart", true, true)
+	if err != nil {
+		return errors.Wrap(err, "Create restart operation")
+	}
+
+	err = inst.Start(false)
+	if err != nil {
+		op.Done(err)
+		return err
+	}
+
+	return nil
 }
 
 // runHooks executes the callback functions returned from a function.
diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go
index 08b5758128..8ca678e9bf 100644
--- a/lxd/instance/drivers/driver_lxc.go
+++ b/lxd/instance/drivers/driver_lxc.go
@@ -2220,10 +2220,14 @@ func (d *lxc) Start(stateful bool) error {
 	var ctxMap log.Ctx
 
 	// Setup a new operation
-	op, err := operationlock.Create(d.id, "start", false, false)
+	exists, op, err := operationlock.CreateWaitGet(d.id, "start", []string{"restart"}, false, false)
 	if err != nil {
 		return errors.Wrap(err, "Create container start operation")
 	}
+	if exists {
+		// An existing matching operation has now succeeded, return.
+		return nil
+	}
 	defer op.Done(nil)
 
 	if !daemon.SharedMountsSetup {
@@ -2248,7 +2252,9 @@ func (d *lxc) Start(stateful bool) error {
 		"used":      d.lastUsedDate,
 		"stateful":  stateful}
 
-	logger.Info("Starting container", ctxMap)
+	if op.Action() == "start" {
+		logger.Info("Starting container", ctxMap)
+	}
 
 	// If stateful, restore now
 	if stateful {
@@ -2293,7 +2299,11 @@ func (d *lxc) Start(stateful bool) error {
 			return err
 		}
 
-		logger.Info("Started container", ctxMap)
+		if op.Action() == "start" {
+			logger.Info("Started container", ctxMap)
+			d.state.Events.SendLifecycle(d.project, "container-started",
+				fmt.Sprintf("/1.0/containers/%s", d.name), nil)
+		}
 		return nil
 	} else if d.stateful {
 		/* stateless start required when we have state, let's delete it */
@@ -2365,9 +2375,11 @@ func (d *lxc) Start(stateful bool) error {
 		return err
 	}
 
-	logger.Info("Started container", ctxMap)
-	d.state.Events.SendLifecycle(d.project, "container-started",
-		fmt.Sprintf("/1.0/containers/%s", d.name), nil)
+	if op.Action() == "start" {
+		logger.Info("Started container", ctxMap)
+		d.state.Events.SendLifecycle(d.project, "container-started",
+			fmt.Sprintf("/1.0/containers/%s", d.name), nil)
+	}
 
 	return nil
 }
@@ -2462,16 +2474,22 @@ func (d *lxc) onStart(_ map[string]string) error {
 func (d *lxc) Stop(stateful bool) error {
 	var ctxMap log.Ctx
 
-	// Check that we're not already stopped
-	if !d.IsRunning() {
-		return fmt.Errorf("The container is already stopped")
-	}
-
 	// Setup a new operation
-	op, err := operationlock.Create(d.id, "stop", false, true)
+	exists, op, err := operationlock.CreateWaitGet(d.id, "stop", []string{"restart"}, false, true)
 	if err != nil {
 		return err
 	}
+	if exists {
+		// An existing matching operation has now succeeded, return.
+		return nil
+	}
+
+	// Check that we're not already stopped
+	if !d.IsRunning() {
+		err = fmt.Errorf("The container is already stopped")
+		op.Done(err)
+		return err
+	}
 
 	ctxMap = log.Ctx{
 		"project":   d.project,
@@ -2482,7 +2500,9 @@ func (d *lxc) Stop(stateful bool) error {
 		"used":      d.lastUsedDate,
 		"stateful":  stateful}
 
-	logger.Info("Stopping container", ctxMap)
+	if op.Action() == "stop" {
+		logger.Info("Stopping container", ctxMap)
+	}
 
 	// Handle stateful stop
 	if stateful {
@@ -2586,9 +2606,11 @@ func (d *lxc) Stop(stateful bool) error {
 		return err
 	}
 
-	logger.Info("Stopped container", ctxMap)
-	d.state.Events.SendLifecycle(d.project, "container-stopped",
-		fmt.Sprintf("/1.0/containers/%s", d.name), nil)
+	if op.Action() == "stop" {
+		logger.Info("Stopped container", ctxMap)
+		d.state.Events.SendLifecycle(d.project, "container-stopped",
+			fmt.Sprintf("/1.0/containers/%s", d.name), nil)
+	}
 
 	return nil
 }
@@ -2597,16 +2619,22 @@ func (d *lxc) Stop(stateful bool) error {
 func (d *lxc) Shutdown(timeout time.Duration) error {
 	var ctxMap log.Ctx
 
-	// Check that we're not already stopped
-	if !d.IsRunning() {
-		return fmt.Errorf("The container is already stopped")
-	}
-
 	// Setup a new operation
-	op, err := operationlock.Create(d.id, "stop", true, true)
+	exists, op, err := operationlock.CreateWaitGet(d.id, "stop", []string{"restart"}, true, false)
 	if err != nil {
 		return err
 	}
+	if exists {
+		// An existing matching operation has now succeeded, return.
+		return nil
+	}
+
+	// Check that we're not already stopped
+	if !d.IsRunning() {
+		err = fmt.Errorf("The container is already stopped")
+		op.Done(err)
+		return err
+	}
 
 	ctxMap = log.Ctx{
 		"project":   d.project,
@@ -2617,7 +2645,9 @@ func (d *lxc) Shutdown(timeout time.Duration) error {
 		"used":      d.lastUsedDate,
 		"timeout":   timeout}
 
-	logger.Info("Shutting down container", ctxMap)
+	if op.Action() == "stop" {
+		logger.Info("Shutting down container", ctxMap)
+	}
 
 	// Load the go-lxc struct
 	if d.expandedConfig["raw.lxc"] != "" {
@@ -2645,16 +2675,38 @@ func (d *lxc) Shutdown(timeout time.Duration) error {
 		return err
 	}
 
-	logger.Info("Shut down container", ctxMap)
-	d.state.Events.SendLifecycle(d.project, "container-shutdown",
-		fmt.Sprintf("/1.0/containers/%s", d.name), nil)
+	if op.Action() == "stop" {
+		logger.Info("Shut down container", ctxMap)
+		d.state.Events.SendLifecycle(d.project, "container-shutdown",
+			fmt.Sprintf("/1.0/containers/%s", d.name), nil)
+	}
 
 	return nil
 }
 
 // Restart restart the instance.
 func (d *lxc) Restart(timeout time.Duration) error {
-	return d.common.restart(d, timeout)
+	ctxMap := log.Ctx{
+		"project":   d.project,
+		"name":      d.name,
+		"action":    "shutdown",
+		"created":   d.creationDate,
+		"ephemeral": d.ephemeral,
+		"used":      d.lastUsedDate,
+		"timeout":   timeout}
+
+	logger.Info("Restarting container", ctxMap)
+
+	err := d.common.restart(d, timeout)
+	if err != nil {
+		return err
+	}
+
+	logger.Info("Restarted container", ctxMap)
+	d.state.Events.SendLifecycle(d.project, "container-restarted",
+		fmt.Sprintf("/1.0/containers/%s", d.name), nil)
+
+	return nil
 }
 
 // onStopNS is triggered by LXC's stop hook once a container is shutdown but before the container's
@@ -2678,6 +2730,7 @@ func (d *lxc) onStopNS(args map[string]string) error {
 // onStop is triggered by LXC's post-stop hook once a container is shutdown and after the
 // container's namespaces have been closed.
 func (d *lxc) onStop(args map[string]string) error {
+	var err error
 	target := args["target"]
 
 	// Validate target
@@ -2688,10 +2741,17 @@ func (d *lxc) onStop(args map[string]string) error {
 
 	// Pick up the existing stop operation lock created in Stop() function.
 	op := operationlock.Get(d.id)
-	if op != nil && op.Action() != "stop" {
+	if op != nil && !shared.StringInSlice(op.Action(), []string{"stop", "restart"}) {
 		return fmt.Errorf("Container is already running a %s operation", op.Action())
 	}
 
+	if op == nil && target == "reboot" {
+		op, err = operationlock.Create(d.id, "restart", false, false)
+		if err != nil {
+			return errors.Wrap(err, "Create restart operation")
+		}
+	}
+
 	// Make sure we can't call go-lxc functions by mistake
 	d.fromHook = true
 
@@ -2710,7 +2770,7 @@ func (d *lxc) onStop(args map[string]string) error {
 	}
 
 	// Record power state
-	err := d.state.Cluster.UpdateInstancePowerState(d.id, "STOPPED")
+	err = d.state.Cluster.UpdateInstancePowerState(d.id, "STOPPED")
 	if err != nil {
 		err = errors.Wrap(err, "Failed to set container state")
 		op.Done(err)
diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go
index 5bddefb8ba..54bad6669f 100644
--- a/lxd/instance/drivers/driver_qemu.go
+++ b/lxd/instance/drivers/driver_qemu.go
@@ -487,18 +487,21 @@ func (d *qemu) Freeze() error {
 
 // onStop is run when the instance stops.
 func (d *qemu) onStop(target string) error {
-	ctxMap := log.Ctx{
-		"project":   d.project,
-		"name":      d.name,
-		"ephemeral": d.ephemeral,
-	}
+	var err error
 
 	// Pick up the existing stop operation lock created in Stop() function.
 	op := operationlock.Get(d.id)
-	if op != nil && op.Action() != "stop" {
+	if op != nil && !shared.StringInSlice(op.Action(), []string{"stop", "restart"}) {
 		return fmt.Errorf("Instance is already running a %s operation", op.Action())
 	}
 
+	if op == nil && target == "reboot" {
+		op, err = operationlock.Create(d.id, "restart", false, false)
+		if err != nil {
+			return errors.Wrap(err, "Create restart operation")
+		}
+	}
+
 	// Cleanup.
 	d.cleanupDevices()
 	os.Remove(d.pidFilePath())
@@ -516,49 +519,61 @@ func (d *qemu) onStop(target string) error {
 	// Record power state.
 	err = d.state.Cluster.UpdateInstancePowerState(d.id, "STOPPED")
 	if err != nil {
-		if op != nil {
-			op.Done(err)
-		}
+		op.Done(err)
 		return err
 	}
 
 	// Unload the apparmor profile
 	err = apparmor.InstanceUnload(d.state, d)
 	if err != nil {
-		ctxMap["err"] = err
-		logger.Error("Failed to unload AppArmor profile", ctxMap)
+		op.Done(err)
+		return err
 	}
 
 	if target == "reboot" {
 		err = d.Start(false)
+		if err != nil {
+			op.Done(err)
+			return err
+		}
+
 		d.state.Events.SendLifecycle(d.project, "virtual-machine-restarted",
 			fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil)
 	} else if d.ephemeral {
 		// Destroy ephemeral virtual machines
 		err = d.Delete()
-	}
-	if err != nil {
-		return err
+		if err != nil {
+			op.Done(err)
+			return err
+		}
 	}
 
-	if op != nil {
-		op.Done(nil)
+	if target != "reboot" {
+		d.state.Events.SendLifecycle(d.project, "virtual-machine-shutdown",
+			fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil)
 	}
 
+	op.Done(nil)
 	return nil
 }
 
 // Shutdown shuts the instance down.
 func (d *qemu) Shutdown(timeout time.Duration) error {
-	if !d.IsRunning() {
-		return fmt.Errorf("The instance is already stopped")
-	}
-
 	// Setup a new operation
-	op, err := operationlock.Create(d.id, "stop", true, true)
+	exists, op, err := operationlock.CreateWaitGet(d.id, "stop", []string{"restart"}, true, false)
 	if err != nil {
 		return err
 	}
+	if exists {
+		// An existing matching operation has now succeeded, return.
+		return nil
+	}
+
+	if !d.IsRunning() {
+		err = fmt.Errorf("The instance is already stopped")
+		op.Done(err)
+		return err
+	}
 
 	// Connect to the monitor.
 	monitor, err := qmp.Connect(d.monitorPath(), qemuSerialChardevName, d.getMonitorEventHandler())
@@ -597,8 +612,9 @@ func (d *qemu) Shutdown(timeout time.Duration) error {
 		case <-chDisconnect:
 			break
 		case <-time.After(timeout):
-			op.Done(fmt.Errorf("Instance was not shutdown after timeout"))
-			return fmt.Errorf("Instance was not shutdown after timeout")
+			err = fmt.Errorf("Instance was not shutdown after timeout")
+			op.Done(err)
+			return err
 		}
 	} else {
 		<-chDisconnect // Block until VM is not running if no timeout provided.
@@ -610,14 +626,24 @@ func (d *qemu) Shutdown(timeout time.Duration) error {
 		return err
 	}
 
-	op.Done(nil)
-	d.state.Events.SendLifecycle(d.project, "virtual-machine-shutdown", fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil)
+	if op.Action() == "stop" {
+		d.state.Events.SendLifecycle(d.project, "virtual-machine-shutdown", fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil)
+	}
+
 	return nil
 }
 
 // Restart restart the instance.
 func (d *qemu) Restart(timeout time.Duration) error {
-	return d.common.restart(d, timeout)
+	err := d.common.restart(d, timeout)
+	if err != nil {
+		return err
+	}
+
+	d.state.Events.SendLifecycle(d.project, "virtual-machine-restarted",
+		fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil)
+
+	return nil
 }
 
 func (d *qemu) ovmfPath() string {
@@ -630,23 +656,29 @@ func (d *qemu) ovmfPath() string {
 
 // Start starts the instance.
 func (d *qemu) Start(stateful bool) error {
+	// Setup a new operation
+	exists, op, err := operationlock.CreateWaitGet(d.id, "start", []string{"restart"}, false, false)
+	if err != nil {
+		return errors.Wrap(err, "Create instance start operation")
+	}
+	if exists {
+		// An existing matching operation has now succeeded, return.
+		return nil
+	}
+	defer op.Done(nil)
+
 	// Ensure the correct vhost_vsock kernel module is loaded before establishing the vsock.
-	err := util.LoadModule("vhost_vsock")
+	err = util.LoadModule("vhost_vsock")
 	if err != nil {
+		op.Done(err)
 		return err
 	}
 
 	if d.IsRunning() {
+		op.Done(err)
 		return fmt.Errorf("The instance is already running")
 	}
 
-	// Setup a new operation
-	op, err := operationlock.Create(d.id, "start", false, false)
-	if err != nil {
-		return errors.Wrap(err, "Create instance start operation")
-	}
-	defer op.Done(nil)
-
 	revert := revert.New()
 	defer revert.Fail()
 
@@ -659,6 +691,7 @@ func (d *qemu) Start(stateful bool) error {
 		os.Remove(logfile + ".old")
 		err := os.Rename(logfile, logfile+".old")
 		if err != nil {
+			op.Done(err)
 			return err
 		}
 	}
@@ -714,11 +747,13 @@ func (d *qemu) Start(stateful bool) error {
 		// Start the virtiofsd process in non-daemon mode.
 		proc, err := subprocess.NewProcess(cmd, []string{fmt.Sprintf("--socket-path=%s", sockPath), "-o", fmt.Sprintf("source=%s", filepath.Join(d.Path(), "config"))}, "", "")
 		if err != nil {
+			op.Done(err)
 			return err
 		}
 
 		err = proc.Start()
 		if err != nil {
+			op.Done(err)
 			return err
 		}
 
@@ -728,6 +763,7 @@ func (d *qemu) Start(stateful bool) error {
 
 		err = proc.Save(pidPath)
 		if err != nil {
+			op.Done(err)
 			return err
 		}
 
@@ -741,7 +777,9 @@ func (d *qemu) Start(stateful bool) error {
 		}
 
 		if !shared.PathExists(sockPath) {
-			return fmt.Errorf("virtiofsd failed to bind socket within 1s")
+			err = fmt.Errorf("virtiofsd failed to bind socket within 1s")
+			op.Done(err)
+			return err
 		}
 	} else {
 		logger.Warn("Unable to use virtio-fs for config drive, using 9p as a fallback: virtiofsd missing")
@@ -905,6 +943,7 @@ func (d *qemu) Start(stateful bool) error {
 	// Setup background process.
 	p, err := subprocess.NewProcess(d.state.OS.ExecPath, append(forkLimitsCmd, qemuCmd...), d.EarlyLogFilePath(), d.EarlyLogFilePath())
 	if err != nil {
+		op.Done(err)
 		return err
 	}
 
@@ -960,6 +999,7 @@ func (d *qemu) Start(stateful bool) error {
 
 	err = p.StartWithFiles(files)
 	if err != nil {
+		op.Done(err)
 		return err
 	}
 
@@ -974,6 +1014,7 @@ func (d *qemu) Start(stateful bool) error {
 	pid, err := d.pid()
 	if err != nil {
 		logger.Errorf(`Failed to get VM process ID "%d"`, pid)
+		op.Done(err)
 		return err
 	}
 
@@ -1018,7 +1059,9 @@ func (d *qemu) Start(stateful bool) error {
 
 			// Confirm nothing weird is going on.
 			if len(pins) != len(pids) {
-				return fmt.Errorf("QEMU has less vCPUs than configured")
+				err = fmt.Errorf("QEMU has less vCPUs than configured")
+				op.Done(err)
+				return err
 			}
 
 			for i, pid := range pids {
@@ -1068,7 +1111,11 @@ func (d *qemu) Start(stateful bool) error {
 	}
 
 	revert.Success()
-	d.state.Events.SendLifecycle(d.project, "virtual-machine-started", fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil)
+
+	if op.Action() == "start" {
+		d.state.Events.SendLifecycle(d.project, "virtual-machine-started", fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil)
+	}
+
 	return nil
 }
 
@@ -2462,19 +2509,27 @@ func (d *qemu) pid() (int, error) {
 
 // Stop the VM.
 func (d *qemu) Stop(stateful bool) error {
+	// Setup a new operation.
+	exists, op, err := operationlock.CreateWaitGet(d.id, "stop", []string{"restart"}, false, true)
+	if err != nil {
+		return err
+	}
+	if exists {
+		// An existing matching operation has now succeeded, return.
+		return nil
+	}
+
 	// Check that we're not already stopped.
 	if !d.IsRunning() {
-		return fmt.Errorf("The instance is already stopped")
+		err = fmt.Errorf("The instance is already stopped")
+		op.Done(err)
+		return err
 	}
 
 	// Check that no stateful stop was requested.
 	if stateful {
-		return fmt.Errorf("Stateful stop isn't supported for VMs at this time")
-	}
-
-	// Setup a new operation.
-	op, err := operationlock.Create(d.id, "stop", false, true)
-	if err != nil {
+		err = fmt.Errorf("Stateful stop isn't supported for VMs at this time")
+		op.Done(err)
 		return err
 	}
 
@@ -2519,7 +2574,10 @@ func (d *qemu) Stop(stateful bool) error {
 		return err
 	}
 
-	d.state.Events.SendLifecycle(d.project, "virtual-machine-stopped", fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil)
+	if op.Action() == "stop" {
+		d.state.Events.SendLifecycle(d.project, "virtual-machine-stopped", fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil)
+	}
+
 	return nil
 }
 

From 619f8b327a170eb466ed710b76c3e53ea7b2042a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 25 Nov 2020 19:51:10 -0500
Subject: [PATCH 6/6] Makefile: Fix golint URL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index abf7d2d46a..592ada97dc 100644
--- a/Makefile
+++ b/Makefile
@@ -137,7 +137,7 @@ endif
 check: default
 	go get -v -x github.com/rogpeppe/godeps
 	go get -v -x github.com/tsenart/deadcode
-	go get -v -x github.com/golang/lint/golint
+	go get -v -x golang.org/x/lint/golint
 	go test -v -tags "$(TAG_SQLITE3)" $(DEBUG) ./...
 	cd test && ./main.sh
 


More information about the lxc-devel mailing list