[lxc-devel] [lxd/master] Improve instance operations and fully implement restarted event
stgraber on Github
lxc-bot at linuxcontainers.org
Thu Nov 26 00:52:45 UTC 2020
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 301 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20201125/36264648/attachment-0001.bin>
-------------- next part --------------
From ab85b3dcc245dfbcfc326bbf88fb9a13061133b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 25 Nov 2020 18:44:25 -0500
Subject: [PATCH 1/6] lxd/instance/operations: Allow Wait/Done on nil struct
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
lxd/instance/operationlock/operationlock.go | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/lxd/instance/operationlock/operationlock.go b/lxd/instance/operationlock/operationlock.go
index 6b4a04c485..c9db4ba432 100644
--- a/lxd/instance/operationlock/operationlock.go
+++ b/lxd/instance/operationlock/operationlock.go
@@ -86,6 +86,10 @@ func (op *InstanceOperation) Reset() error {
// Wait waits for an operation to finish.
func (op *InstanceOperation) Wait() error {
+ if op == nil {
+ return nil
+ }
+
<-op.chanDone
return op.err
@@ -96,6 +100,11 @@ func (op *InstanceOperation) Done(err error) {
instanceOperationsLock.Lock()
defer instanceOperationsLock.Unlock()
+ // This function can be called on a nil struct.
+ if op == nil {
+ return
+ }
+
// Check if already done
runningOp, ok := instanceOperations[op.id]
if !ok || runningOp != op {
From 5084fb9f6165af18c32777fedd13df01900cd8ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 25 Nov 2020 18:48:12 -0500
Subject: [PATCH 2/6] lxd/instance/lxc: Improve use of operations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
lxd/instance/drivers/driver_lxc.go | 104 ++++++++++++-----------------
1 file changed, 42 insertions(+), 62 deletions(-)
diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go
index ab7eeba24c..c5ecf02179 100644
--- a/lxd/instance/drivers/driver_lxc.go
+++ b/lxd/instance/drivers/driver_lxc.go
@@ -2227,12 +2227,15 @@ func (d *lxc) Start(stateful bool) error {
defer op.Done(nil)
if !daemon.SharedMountsSetup {
- return fmt.Errorf("Daemon failed to setup shared mounts base: %v. Does security.nesting need to be turned on?", err)
+ err = fmt.Errorf("Daemon failed to setup shared mounts base. Does security.nesting need to be turned on?")
+ op.Done(err)
+ return err
}
// Run the shared start code
configPath, postStartHooks, err := d.startCommon()
if err != nil {
+ op.Done(err)
return errors.Wrap(err, "Failed preparing container for start")
}
@@ -2250,7 +2253,9 @@ func (d *lxc) Start(stateful bool) error {
// If stateful, restore now
if stateful {
if !d.stateful {
- return fmt.Errorf("Container has no existing state to restore")
+ err = fmt.Errorf("Container has no existing state to restore")
+ op.Done(err)
+ return err
}
criuMigrationArgs := instance.CriuMigrationArgs{
@@ -2265,6 +2270,7 @@ func (d *lxc) Start(stateful bool) error {
err := d.Migrate(&criuMigrationArgs)
if err != nil && !d.IsRunning() {
+ op.Done(err)
return errors.Wrap(err, "Migrate")
}
@@ -2273,7 +2279,7 @@ func (d *lxc) Start(stateful bool) error {
err = d.state.Cluster.UpdateInstanceStatefulFlag(d.id, false)
if err != nil {
- logger.Error("Failed starting container", ctxMap)
+ op.Done(err)
return errors.Wrap(err, "Start container")
}
@@ -2281,8 +2287,9 @@ func (d *lxc) Start(stateful bool) error {
err = d.runHooks(postStartHooks)
if err != nil {
// Attempt to stop container.
- op.Done(err)
d.Stop(false)
+
+ op.Done(err)
return err
}
@@ -2292,12 +2299,14 @@ func (d *lxc) Start(stateful bool) error {
/* stateless start required when we have state, let's delete it */
err := os.RemoveAll(d.StatePath())
if err != nil {
+ op.Done(err)
return err
}
d.stateful = false
err = d.state.Cluster.UpdateInstanceStatefulFlag(d.id, false)
if err != nil {
+ op.Done(err)
return errors.Wrap(err, "Persist stateful flag")
}
}
@@ -2342,6 +2351,7 @@ func (d *lxc) Start(stateful bool) error {
logger.Error("Failed starting container", ctxMap)
// Return the actual error
+ op.Done(err)
return err
}
@@ -2349,8 +2359,9 @@ func (d *lxc) Start(stateful bool) error {
err = d.runHooks(postStartHooks)
if err != nil {
// Attempt to stop container.
- op.Done(err)
d.Stop(false)
+
+ op.Done(err)
return err
}
@@ -2482,7 +2493,6 @@ func (d *lxc) Stop(stateful bool) error {
err := os.MkdirAll(stateDir, 0700)
if err != nil {
op.Done(err)
- logger.Error("Failed stopping container", ctxMap)
return err
}
@@ -2500,28 +2510,25 @@ func (d *lxc) Stop(stateful bool) error {
err = d.Migrate(&criuMigrationArgs)
if err != nil {
op.Done(err)
- logger.Error("Failed stopping container", ctxMap)
return err
}
err = op.Wait()
if err != nil && d.IsRunning() {
- logger.Error("Failed stopping container", ctxMap)
return err
}
d.stateful = true
err = d.state.Cluster.UpdateInstanceStatefulFlag(d.id, true)
if err != nil {
- op.Done(err)
logger.Error("Failed stopping container", ctxMap)
return err
}
- op.Done(nil)
logger.Info("Stopped container", ctxMap)
d.state.Events.SendLifecycle(d.project, "container-stopped",
fmt.Sprintf("/1.0/containers/%s", d.name), nil)
+
return nil
} else if shared.PathExists(d.StatePath()) {
os.RemoveAll(d.StatePath())
@@ -2532,14 +2539,12 @@ func (d *lxc) Stop(stateful bool) error {
err = d.initLXC(true)
if err != nil {
op.Done(err)
- logger.Error("Failed stopping container", ctxMap)
return err
}
} else {
err = d.initLXC(false)
if err != nil {
op.Done(err)
- logger.Error("Failed stopping container", ctxMap)
return err
}
}
@@ -2570,15 +2575,14 @@ func (d *lxc) Stop(stateful bool) error {
}
}
- if err := d.c.Stop(); err != nil {
+ err = d.c.Stop()
+ if err != nil {
op.Done(err)
- logger.Error("Failed stopping container", ctxMap)
return err
}
err = op.Wait()
if err != nil && d.IsRunning() {
- logger.Error("Failed stopping container", ctxMap)
return err
}
@@ -2620,27 +2624,24 @@ func (d *lxc) Shutdown(timeout time.Duration) error {
err = d.initLXC(true)
if err != nil {
op.Done(err)
- logger.Error("Failed stopping container", ctxMap)
return err
}
} else {
err = d.initLXC(false)
if err != nil {
op.Done(err)
- logger.Error("Failed stopping container", ctxMap)
return err
}
}
- if err := d.c.Shutdown(timeout); err != nil {
+ err = d.c.Shutdown(timeout)
+ if err != nil {
op.Done(err)
- logger.Error("Failed shutting down container", ctxMap)
return err
}
err = op.Wait()
if err != nil && d.IsRunning() {
- logger.Error("Failed shutting down container", ctxMap)
return err
}
@@ -2705,13 +2706,15 @@ func (d *lxc) onStop(args map[string]string) error {
"stateful": false}
if op == nil {
- logger.Info(fmt.Sprintf("Container initiated %s", target), ctxMap)
+ logger.Debug(fmt.Sprintf("Container initiated %s", target), ctxMap)
}
// Record power state
err := d.state.Cluster.UpdateInstancePowerState(d.id, "STOPPED")
if err != nil {
- logger.Error("Failed to set container state", log.Ctx{"container": d.Name(), "err": err})
+ err = errors.Wrap(err, "Failed to set container state")
+ op.Done(err)
+ return err
}
go func(d *lxc, target string, op *operationlock.InstanceOperation) {
@@ -2719,13 +2722,11 @@ func (d *lxc) onStop(args map[string]string) error {
err = nil
// Unlock on return
- if op != nil {
- defer op.Done(err)
- }
+ defer op.Done(nil)
// Wait for other post-stop actions to be done and the container actually stopping.
d.IsRunning()
- logger.Debug("Container stopped, starting storage cleanup", log.Ctx{"container": d.Name()})
+ logger.Debug("Container stopped, cleaning up", log.Ctx{"container": d.Name()})
// Clean up devices.
d.cleanupDevices(false, "")
@@ -2733,56 +2734,42 @@ func (d *lxc) onStop(args map[string]string) error {
// Remove directory ownership (to avoid issue if uidmap is re-used)
err := os.Chown(d.Path(), 0, 0)
if err != nil {
- if op != nil {
- op.Done(err)
- }
-
- logger.Error("Failed clearing ownernship", log.Ctx{"container": d.Name(), "err": err, "path": d.Path()})
+ op.Done(errors.Wrap(err, "Failed clearing ownership"))
+ return
}
err = os.Chmod(d.Path(), 0100)
if err != nil {
- if op != nil {
- op.Done(err)
- }
-
- logger.Error("Failed clearing permissions", log.Ctx{"container": d.Name(), "err": err, "path": d.Path()})
+ op.Done(errors.Wrap(err, "Failed clearing permissions"))
+ return
}
// Stop the storage for this container
_, err = d.unmount()
if err != nil {
- if op != nil {
- op.Done(err)
- }
-
- logger.Error("Failed unnounting container", log.Ctx{"container": d.Name(), "err": err})
+ op.Done(errors.Wrap(err, "Failed unmounting container"))
+ return
}
// Unload the apparmor profile
err = apparmor.InstanceUnload(d.state, d)
if err != nil {
- logger.Error("Failed to destroy apparmor namespace", log.Ctx{"container": d.Name(), "err": err})
+ op.Done(errors.Wrap(err, "Failed to destroy apparmor namespace"))
+ return
}
// Clean all the unix devices
err = d.removeUnixDevices()
if err != nil {
- if op != nil {
- op.Done(err)
- }
-
- logger.Error("Unable to remove unix devices", log.Ctx{"container": d.Name(), "err": err})
+ op.Done(errors.Wrap(err, "Failed to remove unix devices"))
+ return
}
// Clean all the disk devices
err = d.removeDiskDevices()
if err != nil {
- if op != nil {
- op.Done(err)
- }
-
- logger.Error("Unable to remove disk devices", log.Ctx{"container": d.Name(), "err": err})
+ op.Done(errors.Wrap(err, "Failed to remove disk devices"))
+ return
}
// Log and emit lifecycle if not user triggered
@@ -2796,11 +2783,7 @@ func (d *lxc) onStop(args map[string]string) error {
// Start the container again
err = d.Start(false)
if err != nil {
- if op != nil {
- op.Done(err)
- }
-
- logger.Error("Failed restarting container", log.Ctx{"container": d.Name(), "err": err})
+ op.Done(errors.Wrap(err, "Failed restarting container"))
return
}
@@ -2815,11 +2798,8 @@ func (d *lxc) onStop(args map[string]string) error {
if d.ephemeral {
err = d.Delete()
if err != nil {
- if op != nil {
- op.Done(err)
- }
-
- logger.Error("Failed deleting ephemeral", log.Ctx{"container": d.Name(), "err": err})
+ op.Done(errors.Wrap(err, "Failed deleting ephemeral container"))
+ return
}
}
}(d, target, op)
From 80763eba205580b8dc9dc989611121227f72926f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 25 Nov 2020 18:48:44 -0500
Subject: [PATCH 3/6] lxd/instance/lxc: Improve locking on file ops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
lxd/instance/drivers/driver_lxc.go | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go
index c5ecf02179..08b5758128 100644
--- a/lxd/instance/drivers/driver_lxc.go
+++ b/lxd/instance/drivers/driver_lxc.go
@@ -4980,6 +4980,9 @@ func (d *lxc) inheritInitPidFd() (int, *os.File) {
// FileExists returns whether file exists inside instance.
func (d *lxc) FileExists(path string) error {
+ // Check for ongoing operations (that may involve shifting).
+ operationlock.Get(d.id).Wait()
+
// Setup container storage if needed
_, err := d.mount()
if err != nil {
@@ -5026,10 +5029,7 @@ func (d *lxc) FileExists(path string) error {
// FilePull gets a file from the instance.
func (d *lxc) FilePull(srcpath string, dstpath string) (int64, int64, os.FileMode, string, []string, error) {
// Check for ongoing operations (that may involve shifting).
- op := operationlock.Get(d.id)
- if op != nil {
- op.Wait()
- }
+ operationlock.Get(d.id).Wait()
// Setup container storage if needed
_, err := d.mount()
@@ -5152,10 +5152,7 @@ func (d *lxc) FilePull(srcpath string, dstpath string) (int64, int64, os.FileMod
// FilePush sends a file into the instance.
func (d *lxc) FilePush(fileType string, srcpath string, dstpath string, uid int64, gid int64, mode int, write string) error {
// Check for ongoing operations (that may involve shifting).
- op := operationlock.Get(d.id)
- if op != nil {
- op.Wait()
- }
+ operationlock.Get(d.id).Wait()
var rootUID int64
var rootGID int64
@@ -5244,6 +5241,9 @@ func (d *lxc) FilePush(fileType string, srcpath string, dstpath string, uid int6
// FileRemove removes a file inside the instance.
func (d *lxc) FileRemove(path string) error {
+ // Check for ongoing operations (that may involve shifting).
+ operationlock.Get(d.id).Wait()
+
var errStr string
// Setup container storage if needed
From 9a6f8f6d6f772149e1a72fb3a790065eab2fb047 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 25 Nov 2020 18:50:25 -0500
Subject: [PATCH 4/6] lxd/instance/operations: Introduce CreateWaitGet
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
lxd/instance/operationlock/operationlock.go | 39 +++++++++++++++++++++
1 file changed, 39 insertions(+)
diff --git a/lxd/instance/operationlock/operationlock.go b/lxd/instance/operationlock/operationlock.go
index c9db4ba432..e55f04dffc 100644
--- a/lxd/instance/operationlock/operationlock.go
+++ b/lxd/instance/operationlock/operationlock.go
@@ -4,6 +4,8 @@ import (
"fmt"
"sync"
"time"
+
+ "github.com/lxc/lxd/shared"
)
var instanceOperationsLock sync.Mutex
@@ -66,6 +68,43 @@ func Create(instanceID int, action string, reusable bool, reuse bool) (*Instance
return op, nil
}
+// CreateWaitGet is a weird function which does what we happen to want most of the time.
+//
+// If the instance has an operation of the same type and it's not reusable
+// or the caller doesn't want to reuse it, the function will wait and
+// indicate that it did so.
+//
+// If the instance has an operation of one of the alternate types, then
+// the operation is returned to the user.
+//
+// If the instance doesn't have an operation, has an operation of a different
+// type that is not in the alternate list or has the right type and is
+// being reused, then this behaves as a Create call.
+func CreateWaitGet(instanceID int, action string, altActions []string, reusable bool, reuse bool) (bool, *InstanceOperation, error) {
+ op := Get(instanceID)
+
+ // No existing operation, call create.
+ if op == nil {
+ op, err := Create(instanceID, action, reusable, reuse)
+ return false, op, err
+ }
+
+ // Operation matches and not reusable or asked to reuse, wait.
+ if op.action == action && (!reuse || !op.reusable) {
+ err := op.Wait()
+ return true, nil, err
+ }
+
+ // Operation matches one the alternate actions, return the operation.
+ if shared.StringInSlice(op.action, altActions) {
+ return false, op, nil
+ }
+
+ // Send the rest to Create
+ op, err := Create(instanceID, action, reusable, reuse)
+ return false, op, err
+}
+
// Get retrieves an existing lock or returns nil if no lock exists.
func Get(instanceID int) *InstanceOperation {
instanceOperationsLock.Lock()
From c001bbb6e71d61f802599448edec71214e6d258d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 25 Nov 2020 18:50:49 -0500
Subject: [PATCH 5/6] lxd/instance: Introduce restart tracking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Closes #8010
Closes #8090
Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
Signed-off-by: Daniel Segal <dlemel8 at gmail.com>
---
lxd/instance/drivers/driver_common.go | 27 ++++-
lxd/instance/drivers/driver_lxc.go | 118 +++++++++++++++-----
lxd/instance/drivers/driver_qemu.go | 148 ++++++++++++++++++--------
3 files changed, 217 insertions(+), 76 deletions(-)
diff --git a/lxd/instance/drivers/driver_common.go b/lxd/instance/drivers/driver_common.go
index 395a200cf7..ecb5d9bf55 100644
--- a/lxd/instance/drivers/driver_common.go
+++ b/lxd/instance/drivers/driver_common.go
@@ -14,6 +14,7 @@ import (
"github.com/lxc/lxd/lxd/device/nictype"
"github.com/lxc/lxd/lxd/instance"
"github.com/lxc/lxd/lxd/instance/instancetype"
+ "github.com/lxc/lxd/lxd/instance/operationlock"
"github.com/lxc/lxd/lxd/operations"
"github.com/lxc/lxd/lxd/project"
"github.com/lxc/lxd/lxd/state"
@@ -430,23 +431,45 @@ func (d *common) expandDevices(profiles []api.Profile) error {
// restart handles instance restarts.
func (d *common) restart(inst instance.Instance, timeout time.Duration) error {
+ // Setup a new operation for the stop/shutdown phase.
+ op, err := operationlock.Create(d.id, "restart", true, true)
+ if err != nil {
+ return errors.Wrap(err, "Create restart operation")
+ }
+
if timeout == 0 {
err := inst.Stop(false)
if err != nil {
+ op.Done(err)
return err
}
} else {
if inst.IsFrozen() {
- return errors.New("Instance is not running")
+ err = fmt.Errorf("Instance is not running")
+ op.Done(err)
+ return err
}
err := inst.Shutdown(timeout * time.Second)
if err != nil {
+ op.Done(err)
return err
}
}
- return inst.Start(false)
+ // Setup a new operation for the start phase.
+ op, err = operationlock.Create(d.id, "restart", true, true)
+ if err != nil {
+ return errors.Wrap(err, "Create restart operation")
+ }
+
+ err = inst.Start(false)
+ if err != nil {
+ op.Done(err)
+ return err
+ }
+
+ return nil
}
// runHooks executes the callback functions returned from a function.
diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go
index 08b5758128..8ca678e9bf 100644
--- a/lxd/instance/drivers/driver_lxc.go
+++ b/lxd/instance/drivers/driver_lxc.go
@@ -2220,10 +2220,14 @@ func (d *lxc) Start(stateful bool) error {
var ctxMap log.Ctx
// Setup a new operation
- op, err := operationlock.Create(d.id, "start", false, false)
+ exists, op, err := operationlock.CreateWaitGet(d.id, "start", []string{"restart"}, false, false)
if err != nil {
return errors.Wrap(err, "Create container start operation")
}
+ if exists {
+ // An existing matching operation has now succeeded, return.
+ return nil
+ }
defer op.Done(nil)
if !daemon.SharedMountsSetup {
@@ -2248,7 +2252,9 @@ func (d *lxc) Start(stateful bool) error {
"used": d.lastUsedDate,
"stateful": stateful}
- logger.Info("Starting container", ctxMap)
+ if op.Action() == "start" {
+ logger.Info("Starting container", ctxMap)
+ }
// If stateful, restore now
if stateful {
@@ -2293,7 +2299,11 @@ func (d *lxc) Start(stateful bool) error {
return err
}
- logger.Info("Started container", ctxMap)
+ if op.Action() == "start" {
+ logger.Info("Started container", ctxMap)
+ d.state.Events.SendLifecycle(d.project, "container-started",
+ fmt.Sprintf("/1.0/containers/%s", d.name), nil)
+ }
return nil
} else if d.stateful {
/* stateless start required when we have state, let's delete it */
@@ -2365,9 +2375,11 @@ func (d *lxc) Start(stateful bool) error {
return err
}
- logger.Info("Started container", ctxMap)
- d.state.Events.SendLifecycle(d.project, "container-started",
- fmt.Sprintf("/1.0/containers/%s", d.name), nil)
+ if op.Action() == "start" {
+ logger.Info("Started container", ctxMap)
+ d.state.Events.SendLifecycle(d.project, "container-started",
+ fmt.Sprintf("/1.0/containers/%s", d.name), nil)
+ }
return nil
}
@@ -2462,16 +2474,22 @@ func (d *lxc) onStart(_ map[string]string) error {
func (d *lxc) Stop(stateful bool) error {
var ctxMap log.Ctx
- // Check that we're not already stopped
- if !d.IsRunning() {
- return fmt.Errorf("The container is already stopped")
- }
-
// Setup a new operation
- op, err := operationlock.Create(d.id, "stop", false, true)
+ exists, op, err := operationlock.CreateWaitGet(d.id, "stop", []string{"restart"}, false, true)
if err != nil {
return err
}
+ if exists {
+ // An existing matching operation has now succeeded, return.
+ return nil
+ }
+
+ // Check that we're not already stopped
+ if !d.IsRunning() {
+ err = fmt.Errorf("The container is already stopped")
+ op.Done(err)
+ return err
+ }
ctxMap = log.Ctx{
"project": d.project,
@@ -2482,7 +2500,9 @@ func (d *lxc) Stop(stateful bool) error {
"used": d.lastUsedDate,
"stateful": stateful}
- logger.Info("Stopping container", ctxMap)
+ if op.Action() == "stop" {
+ logger.Info("Stopping container", ctxMap)
+ }
// Handle stateful stop
if stateful {
@@ -2586,9 +2606,11 @@ func (d *lxc) Stop(stateful bool) error {
return err
}
- logger.Info("Stopped container", ctxMap)
- d.state.Events.SendLifecycle(d.project, "container-stopped",
- fmt.Sprintf("/1.0/containers/%s", d.name), nil)
+ if op.Action() == "stop" {
+ logger.Info("Stopped container", ctxMap)
+ d.state.Events.SendLifecycle(d.project, "container-stopped",
+ fmt.Sprintf("/1.0/containers/%s", d.name), nil)
+ }
return nil
}
@@ -2597,16 +2619,22 @@ func (d *lxc) Stop(stateful bool) error {
func (d *lxc) Shutdown(timeout time.Duration) error {
var ctxMap log.Ctx
- // Check that we're not already stopped
- if !d.IsRunning() {
- return fmt.Errorf("The container is already stopped")
- }
-
// Setup a new operation
- op, err := operationlock.Create(d.id, "stop", true, true)
+ exists, op, err := operationlock.CreateWaitGet(d.id, "stop", []string{"restart"}, true, false)
if err != nil {
return err
}
+ if exists {
+ // An existing matching operation has now succeeded, return.
+ return nil
+ }
+
+ // Check that we're not already stopped
+ if !d.IsRunning() {
+ err = fmt.Errorf("The container is already stopped")
+ op.Done(err)
+ return err
+ }
ctxMap = log.Ctx{
"project": d.project,
@@ -2617,7 +2645,9 @@ func (d *lxc) Shutdown(timeout time.Duration) error {
"used": d.lastUsedDate,
"timeout": timeout}
- logger.Info("Shutting down container", ctxMap)
+ if op.Action() == "stop" {
+ logger.Info("Shutting down container", ctxMap)
+ }
// Load the go-lxc struct
if d.expandedConfig["raw.lxc"] != "" {
@@ -2645,16 +2675,38 @@ func (d *lxc) Shutdown(timeout time.Duration) error {
return err
}
- logger.Info("Shut down container", ctxMap)
- d.state.Events.SendLifecycle(d.project, "container-shutdown",
- fmt.Sprintf("/1.0/containers/%s", d.name), nil)
+ if op.Action() == "stop" {
+ logger.Info("Shut down container", ctxMap)
+ d.state.Events.SendLifecycle(d.project, "container-shutdown",
+ fmt.Sprintf("/1.0/containers/%s", d.name), nil)
+ }
return nil
}
// Restart restart the instance.
func (d *lxc) Restart(timeout time.Duration) error {
- return d.common.restart(d, timeout)
+ ctxMap := log.Ctx{
+ "project": d.project,
+ "name": d.name,
+ "action": "shutdown",
+ "created": d.creationDate,
+ "ephemeral": d.ephemeral,
+ "used": d.lastUsedDate,
+ "timeout": timeout}
+
+ logger.Info("Restarting container", ctxMap)
+
+ err := d.common.restart(d, timeout)
+ if err != nil {
+ return err
+ }
+
+ logger.Info("Restarted container", ctxMap)
+ d.state.Events.SendLifecycle(d.project, "container-restarted",
+ fmt.Sprintf("/1.0/containers/%s", d.name), nil)
+
+ return nil
}
// onStopNS is triggered by LXC's stop hook once a container is shutdown but before the container's
@@ -2678,6 +2730,7 @@ func (d *lxc) onStopNS(args map[string]string) error {
// onStop is triggered by LXC's post-stop hook once a container is shutdown and after the
// container's namespaces have been closed.
func (d *lxc) onStop(args map[string]string) error {
+ var err error
target := args["target"]
// Validate target
@@ -2688,10 +2741,17 @@ func (d *lxc) onStop(args map[string]string) error {
// Pick up the existing stop operation lock created in Stop() function.
op := operationlock.Get(d.id)
- if op != nil && op.Action() != "stop" {
+ if op != nil && !shared.StringInSlice(op.Action(), []string{"stop", "restart"}) {
return fmt.Errorf("Container is already running a %s operation", op.Action())
}
+ if op == nil && target == "reboot" {
+ op, err = operationlock.Create(d.id, "restart", false, false)
+ if err != nil {
+ return errors.Wrap(err, "Create restart operation")
+ }
+ }
+
// Make sure we can't call go-lxc functions by mistake
d.fromHook = true
@@ -2710,7 +2770,7 @@ func (d *lxc) onStop(args map[string]string) error {
}
// Record power state
- err := d.state.Cluster.UpdateInstancePowerState(d.id, "STOPPED")
+ err = d.state.Cluster.UpdateInstancePowerState(d.id, "STOPPED")
if err != nil {
err = errors.Wrap(err, "Failed to set container state")
op.Done(err)
diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go
index 5bddefb8ba..54bad6669f 100644
--- a/lxd/instance/drivers/driver_qemu.go
+++ b/lxd/instance/drivers/driver_qemu.go
@@ -487,18 +487,21 @@ func (d *qemu) Freeze() error {
// onStop is run when the instance stops.
func (d *qemu) onStop(target string) error {
- ctxMap := log.Ctx{
- "project": d.project,
- "name": d.name,
- "ephemeral": d.ephemeral,
- }
+ var err error
// Pick up the existing stop operation lock created in Stop() function.
op := operationlock.Get(d.id)
- if op != nil && op.Action() != "stop" {
+ if op != nil && !shared.StringInSlice(op.Action(), []string{"stop", "restart"}) {
return fmt.Errorf("Instance is already running a %s operation", op.Action())
}
+ if op == nil && target == "reboot" {
+ op, err = operationlock.Create(d.id, "restart", false, false)
+ if err != nil {
+ return errors.Wrap(err, "Create restart operation")
+ }
+ }
+
// Cleanup.
d.cleanupDevices()
os.Remove(d.pidFilePath())
@@ -516,49 +519,61 @@ func (d *qemu) onStop(target string) error {
// Record power state.
err = d.state.Cluster.UpdateInstancePowerState(d.id, "STOPPED")
if err != nil {
- if op != nil {
- op.Done(err)
- }
+ op.Done(err)
return err
}
// Unload the apparmor profile
err = apparmor.InstanceUnload(d.state, d)
if err != nil {
- ctxMap["err"] = err
- logger.Error("Failed to unload AppArmor profile", ctxMap)
+ op.Done(err)
+ return err
}
if target == "reboot" {
err = d.Start(false)
+ if err != nil {
+ op.Done(err)
+ return err
+ }
+
d.state.Events.SendLifecycle(d.project, "virtual-machine-restarted",
fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil)
} else if d.ephemeral {
// Destroy ephemeral virtual machines
err = d.Delete()
- }
- if err != nil {
- return err
+ if err != nil {
+ op.Done(err)
+ return err
+ }
}
- if op != nil {
- op.Done(nil)
+ if target != "reboot" {
+ d.state.Events.SendLifecycle(d.project, "virtual-machine-shutdown",
+ fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil)
}
+ op.Done(nil)
return nil
}
// Shutdown shuts the instance down.
func (d *qemu) Shutdown(timeout time.Duration) error {
- if !d.IsRunning() {
- return fmt.Errorf("The instance is already stopped")
- }
-
// Setup a new operation
- op, err := operationlock.Create(d.id, "stop", true, true)
+ exists, op, err := operationlock.CreateWaitGet(d.id, "stop", []string{"restart"}, true, false)
if err != nil {
return err
}
+ if exists {
+ // An existing matching operation has now succeeded, return.
+ return nil
+ }
+
+ if !d.IsRunning() {
+ err = fmt.Errorf("The instance is already stopped")
+ op.Done(err)
+ return err
+ }
// Connect to the monitor.
monitor, err := qmp.Connect(d.monitorPath(), qemuSerialChardevName, d.getMonitorEventHandler())
@@ -597,8 +612,9 @@ func (d *qemu) Shutdown(timeout time.Duration) error {
case <-chDisconnect:
break
case <-time.After(timeout):
- op.Done(fmt.Errorf("Instance was not shutdown after timeout"))
- return fmt.Errorf("Instance was not shutdown after timeout")
+ err = fmt.Errorf("Instance was not shutdown after timeout")
+ op.Done(err)
+ return err
}
} else {
<-chDisconnect // Block until VM is not running if no timeout provided.
@@ -610,14 +626,24 @@ func (d *qemu) Shutdown(timeout time.Duration) error {
return err
}
- op.Done(nil)
- d.state.Events.SendLifecycle(d.project, "virtual-machine-shutdown", fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil)
+ if op.Action() == "stop" {
+ d.state.Events.SendLifecycle(d.project, "virtual-machine-shutdown", fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil)
+ }
+
return nil
}
// Restart restart the instance.
func (d *qemu) Restart(timeout time.Duration) error {
- return d.common.restart(d, timeout)
+ err := d.common.restart(d, timeout)
+ if err != nil {
+ return err
+ }
+
+ d.state.Events.SendLifecycle(d.project, "virtual-machine-restarted",
+ fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil)
+
+ return nil
}
func (d *qemu) ovmfPath() string {
@@ -630,23 +656,29 @@ func (d *qemu) ovmfPath() string {
// Start starts the instance.
func (d *qemu) Start(stateful bool) error {
+ // Setup a new operation
+ exists, op, err := operationlock.CreateWaitGet(d.id, "start", []string{"restart"}, false, false)
+ if err != nil {
+ return errors.Wrap(err, "Create instance start operation")
+ }
+ if exists {
+ // An existing matching operation has now succeeded, return.
+ return nil
+ }
+ defer op.Done(nil)
+
// Ensure the correct vhost_vsock kernel module is loaded before establishing the vsock.
- err := util.LoadModule("vhost_vsock")
+ err = util.LoadModule("vhost_vsock")
if err != nil {
+ op.Done(err)
return err
}
if d.IsRunning() {
+ op.Done(err)
return fmt.Errorf("The instance is already running")
}
- // Setup a new operation
- op, err := operationlock.Create(d.id, "start", false, false)
- if err != nil {
- return errors.Wrap(err, "Create instance start operation")
- }
- defer op.Done(nil)
-
revert := revert.New()
defer revert.Fail()
@@ -659,6 +691,7 @@ func (d *qemu) Start(stateful bool) error {
os.Remove(logfile + ".old")
err := os.Rename(logfile, logfile+".old")
if err != nil {
+ op.Done(err)
return err
}
}
@@ -714,11 +747,13 @@ func (d *qemu) Start(stateful bool) error {
// Start the virtiofsd process in non-daemon mode.
proc, err := subprocess.NewProcess(cmd, []string{fmt.Sprintf("--socket-path=%s", sockPath), "-o", fmt.Sprintf("source=%s", filepath.Join(d.Path(), "config"))}, "", "")
if err != nil {
+ op.Done(err)
return err
}
err = proc.Start()
if err != nil {
+ op.Done(err)
return err
}
@@ -728,6 +763,7 @@ func (d *qemu) Start(stateful bool) error {
err = proc.Save(pidPath)
if err != nil {
+ op.Done(err)
return err
}
@@ -741,7 +777,9 @@ func (d *qemu) Start(stateful bool) error {
}
if !shared.PathExists(sockPath) {
- return fmt.Errorf("virtiofsd failed to bind socket within 1s")
+ err = fmt.Errorf("virtiofsd failed to bind socket within 1s")
+ op.Done(err)
+ return err
}
} else {
logger.Warn("Unable to use virtio-fs for config drive, using 9p as a fallback: virtiofsd missing")
@@ -905,6 +943,7 @@ func (d *qemu) Start(stateful bool) error {
// Setup background process.
p, err := subprocess.NewProcess(d.state.OS.ExecPath, append(forkLimitsCmd, qemuCmd...), d.EarlyLogFilePath(), d.EarlyLogFilePath())
if err != nil {
+ op.Done(err)
return err
}
@@ -960,6 +999,7 @@ func (d *qemu) Start(stateful bool) error {
err = p.StartWithFiles(files)
if err != nil {
+ op.Done(err)
return err
}
@@ -974,6 +1014,7 @@ func (d *qemu) Start(stateful bool) error {
pid, err := d.pid()
if err != nil {
logger.Errorf(`Failed to get VM process ID "%d"`, pid)
+ op.Done(err)
return err
}
@@ -1018,7 +1059,9 @@ func (d *qemu) Start(stateful bool) error {
// Confirm nothing weird is going on.
if len(pins) != len(pids) {
- return fmt.Errorf("QEMU has less vCPUs than configured")
+ err = fmt.Errorf("QEMU has less vCPUs than configured")
+ op.Done(err)
+ return err
}
for i, pid := range pids {
@@ -1068,7 +1111,11 @@ func (d *qemu) Start(stateful bool) error {
}
revert.Success()
- d.state.Events.SendLifecycle(d.project, "virtual-machine-started", fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil)
+
+ if op.Action() == "start" {
+ d.state.Events.SendLifecycle(d.project, "virtual-machine-started", fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil)
+ }
+
return nil
}
@@ -2462,19 +2509,27 @@ func (d *qemu) pid() (int, error) {
// Stop the VM.
func (d *qemu) Stop(stateful bool) error {
+ // Setup a new operation.
+ exists, op, err := operationlock.CreateWaitGet(d.id, "stop", []string{"restart"}, false, true)
+ if err != nil {
+ return err
+ }
+ if exists {
+ // An existing matching operation has now succeeded, return.
+ return nil
+ }
+
// Check that we're not already stopped.
if !d.IsRunning() {
- return fmt.Errorf("The instance is already stopped")
+ err = fmt.Errorf("The instance is already stopped")
+ op.Done(err)
+ return err
}
// Check that no stateful stop was requested.
if stateful {
- return fmt.Errorf("Stateful stop isn't supported for VMs at this time")
- }
-
- // Setup a new operation.
- op, err := operationlock.Create(d.id, "stop", false, true)
- if err != nil {
+ err = fmt.Errorf("Stateful stop isn't supported for VMs at this time")
+ op.Done(err)
return err
}
@@ -2519,7 +2574,10 @@ func (d *qemu) Stop(stateful bool) error {
return err
}
- d.state.Events.SendLifecycle(d.project, "virtual-machine-stopped", fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil)
+ if op.Action() == "stop" {
+ d.state.Events.SendLifecycle(d.project, "virtual-machine-stopped", fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil)
+ }
+
return nil
}
From 619f8b327a170eb466ed710b76c3e53ea7b2042a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 25 Nov 2020 19:51:10 -0500
Subject: [PATCH 6/6] Makefile: Fix golint URL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index abf7d2d46a..592ada97dc 100644
--- a/Makefile
+++ b/Makefile
@@ -137,7 +137,7 @@ endif
check: default
go get -v -x github.com/rogpeppe/godeps
go get -v -x github.com/tsenart/deadcode
- go get -v -x github.com/golang/lint/golint
+ go get -v -x golang.org/x/lint/golint
go test -v -tags "$(TAG_SQLITE3)" $(DEBUG) ./...
cd test && ./main.sh
More information about the lxc-devel
mailing list