[lxc-devel] [lxd/master] Graceful shutdown
monstermunchkin on Github
lxc-bot at linuxcontainers.org
Tue Jul 7 06:32:55 UTC 2020
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 301 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200706/0df3f737/attachment.bin>
-------------- next part --------------
From c590810a44e25aea2dcb0b5efc01642af5a1fec6 Mon Sep 17 00:00:00 2001
From: Thomas Hipp <thomas.hipp at canonical.com>
Date: Mon, 6 Jul 2020 16:17:54 +0200
Subject: [PATCH 1/6] lxd/main_daemon: s/containers/instances/
This changes the shutdown log message to use "instances" instead of
"containers".
Signed-off-by: Thomas Hipp <thomas.hipp at canonical.com>
---
lxd/main_daemon.go | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lxd/main_daemon.go b/lxd/main_daemon.go
index e6ade1aac0..65782ff201 100644
--- a/lxd/main_daemon.go
+++ b/lxd/main_daemon.go
@@ -80,7 +80,7 @@ func (c *cmdDaemon) Run(cmd *cobra.Command, args []string) error {
select {
case sig := <-ch:
if sig == unix.SIGPWR {
- logger.Infof("Received '%s signal', shutting down containers", sig)
+ logger.Infof("Received '%s signal', shutting down instances", sig)
d.Kill()
containersShutdown(s)
networkShutdown(s)
@@ -90,7 +90,7 @@ func (c *cmdDaemon) Run(cmd *cobra.Command, args []string) error {
}
case <-d.shutdownChan:
- logger.Infof("Asked to shutdown by API, shutting down containers")
+ logger.Infof("Asked to shutdown by API, shutting down instances")
d.Kill()
containersShutdown(s)
networkShutdown(s)
From a56b71de165469ad08afcc18ddb5be3247cba616 Mon Sep 17 00:00:00 2001
From: Thomas Hipp <thomas.hipp at canonical.com>
Date: Mon, 6 Jul 2020 16:20:01 +0200
Subject: [PATCH 2/6] lxd: s/containersShutdown/instancesShutdown/
This renames the function containersShutdown() to instancesShutdown().
Signed-off-by: Thomas Hipp <thomas.hipp at canonical.com>
---
lxd/daemon.go | 2 +-
lxd/instances.go | 2 +-
lxd/main_daemon.go | 4 ++--
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/lxd/daemon.go b/lxd/daemon.go
index f04ceb58dd..aec7e6f11b 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -856,7 +856,7 @@ func (d *Daemon) init() error {
logger.Debugf("Restarting all the containers following directory rename")
s := d.State()
- containersShutdown(s)
+ instancesShutdown(s)
containersRestart(s)
}
diff --git a/lxd/instances.go b/lxd/instances.go
index 900d938d73..9bb2272725 100644
--- a/lxd/instances.go
+++ b/lxd/instances.go
@@ -313,7 +313,7 @@ func containersOnDisk() (map[string][]string, error) {
return containers, nil
}
-func containersShutdown(s *state.State) error {
+func instancesShutdown(s *state.State) error {
var wg sync.WaitGroup
dbAvailable := true
diff --git a/lxd/main_daemon.go b/lxd/main_daemon.go
index 65782ff201..551a526f6c 100644
--- a/lxd/main_daemon.go
+++ b/lxd/main_daemon.go
@@ -82,7 +82,7 @@ func (c *cmdDaemon) Run(cmd *cobra.Command, args []string) error {
if sig == unix.SIGPWR {
logger.Infof("Received '%s signal', shutting down instances", sig)
d.Kill()
- containersShutdown(s)
+ instancesShutdown(s)
networkShutdown(s)
} else {
logger.Infof("Received '%s signal', exiting", sig)
@@ -92,7 +92,7 @@ func (c *cmdDaemon) Run(cmd *cobra.Command, args []string) error {
case <-d.shutdownChan:
logger.Infof("Asked to shutdown by API, shutting down instances")
d.Kill()
- containersShutdown(s)
+ instancesShutdown(s)
networkShutdown(s)
}
From abbd4b4b7c4c6286de72f3eb8d1ed11c57d42d49 Mon Sep 17 00:00:00 2001
From: Thomas Hipp <thomas.hipp at canonical.com>
Date: Tue, 7 Jul 2020 08:21:15 +0200
Subject: [PATCH 3/6] lxd: Add shutdown indicator to state
This adds a new boolean to the state which indicates whether or not the
daemon is about to shut down.
Signed-off-by: Thomas Hipp <thomas.hipp at canonical.com>
---
lxd/daemon.go | 5 ++++-
lxd/main_init.go | 2 +-
lxd/state/notlinux.go | 3 ++-
lxd/state/state.go | 6 +++++-
lxd/state/testing.go | 2 +-
5 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/lxd/daemon.go b/lxd/daemon.go
index aec7e6f11b..2b68b9a02b 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -355,7 +355,10 @@ func writeMacaroonsRequiredResponse(b *identchecker.Bakery, r *http.Request, w h
// State creates a new State instance linked to our internal db and os.
func (d *Daemon) State() *state.State {
- return state.NewState(d.db, d.cluster, d.maas, d.os, d.endpoints, d.events, d.devlxdEvents, d.firewall, d.proxy)
+ // If the daemon is shutting down the shutdown channel will be nil.
+ // This information should be available in the state in order prevent
+ // new operations from starting.
+ return state.NewState(d.db, d.cluster, d.maas, d.os, d.endpoints, d.events, d.devlxdEvents, d.firewall, d.proxy, d.shutdownChan == nil)
}
// UnixSocket returns the full path to the unix.socket file that this daemon is
diff --git a/lxd/main_init.go b/lxd/main_init.go
index 4c92d51d7f..cfee569faa 100644
--- a/lxd/main_init.go
+++ b/lxd/main_init.go
@@ -165,7 +165,7 @@ func (c *cmdInit) availableStorageDrivers(poolType string) []string {
}
// Get info for supported drivers.
- s := state.NewState(nil, nil, nil, sys.DefaultOS(), nil, nil, nil, nil, nil)
+ s := state.NewState(nil, nil, nil, sys.DefaultOS(), nil, nil, nil, nil, nil, false)
supportedDrivers := storageDrivers.SupportedDrivers(s)
drivers := make([]string, 0, len(supportedDrivers))
diff --git a/lxd/state/notlinux.go b/lxd/state/notlinux.go
index 7111219f65..31b218ef0e 100644
--- a/lxd/state/notlinux.go
+++ b/lxd/state/notlinux.go
@@ -8,5 +8,6 @@ import (
// State here is just an empty shim to statisfy dependencies.
type State struct {
- Events *events.Server
+ Events *events.Server
+ Closing bool
}
diff --git a/lxd/state/state.go b/lxd/state/state.go
index cedce5b884..6d7fef1ea6 100644
--- a/lxd/state/state.go
+++ b/lxd/state/state.go
@@ -38,11 +38,14 @@ type State struct {
// Firewall instance
Firewall firewall.Firewall
+
+ // True if daemon is shutting down
+ Closing bool
}
// NewState returns a new State object with the given database and operating
// system components.
-func NewState(node *db.Node, cluster *db.Cluster, maas *maas.Controller, os *sys.OS, endpoints *endpoints.Endpoints, events *events.Server, devlxdEvents *events.Server, firewall firewall.Firewall, proxy func(req *http.Request) (*url.URL, error)) *State {
+func NewState(node *db.Node, cluster *db.Cluster, maas *maas.Controller, os *sys.OS, endpoints *endpoints.Endpoints, events *events.Server, devlxdEvents *events.Server, firewall firewall.Firewall, proxy func(req *http.Request) (*url.URL, error), closing bool) *State {
return &State{
Node: node,
Cluster: cluster,
@@ -53,5 +56,6 @@ func NewState(node *db.Node, cluster *db.Cluster, maas *maas.Controller, os *sys
Events: events,
Firewall: firewall,
Proxy: proxy,
+ Closing: closing,
}
}
diff --git a/lxd/state/testing.go b/lxd/state/testing.go
index b0bc97fed5..c957fee0ed 100644
--- a/lxd/state/testing.go
+++ b/lxd/state/testing.go
@@ -26,7 +26,7 @@ func NewTestState(t *testing.T) (*State, func()) {
osCleanup()
}
- state := NewState(node, cluster, nil, os, nil, nil, nil, firewall.New(), nil)
+ state := NewState(node, cluster, nil, os, nil, nil, nil, firewall.New(), nil, false)
return state, cleanup
}
From 51562d7af2d9cd0711c19d37fa9e616ff284828b Mon Sep 17 00:00:00 2001
From: Thomas Hipp <thomas.hipp at canonical.com>
Date: Tue, 7 Jul 2020 08:23:43 +0200
Subject: [PATCH 4/6] lxd/operations: Add db operation type to Operation
This adds the db operation type to Operation which then can be checked.
Signed-off-by: Thomas Hipp <thomas.hipp at canonical.com>
---
lxd/operations/operations.go | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/lxd/operations/operations.go b/lxd/operations/operations.go
index 787382bb82..db815960f4 100644
--- a/lxd/operations/operations.go
+++ b/lxd/operations/operations.go
@@ -93,6 +93,7 @@ type Operation struct {
canceler *cancel.Canceler
description string
permission string
+ dbOpType db.OperationType
// Those functions are called at various points in the Operation lifecycle
onRun func(*Operation) error
@@ -112,12 +113,18 @@ type Operation struct {
// OperationCreate creates a new operation and returns it. If it cannot be
// created, it returns an error.
func OperationCreate(s *state.State, project string, opClass operationClass, opType db.OperationType, opResources map[string][]string, opMetadata interface{}, onRun func(*Operation) error, onCancel func(*Operation) error, onConnect func(*Operation, *http.Request, http.ResponseWriter) error) (*Operation, error) {
+ // Don't allow new operations when LXD is shutting down.
+ if s.Closing {
+ return nil, fmt.Errorf("LXD is shutting down")
+ }
+
// Main attributes
op := Operation{}
op.project = project
op.id = uuid.NewRandom().String()
op.description = opType.Description()
op.permission = opType.Permission()
+ op.dbOpType = opType
op.class = opClass
op.createdAt = time.Now()
op.updatedAt = op.createdAt
@@ -546,3 +553,8 @@ func (op *Operation) Project() string {
func (op *Operation) Status() api.StatusCode {
return op.status
}
+
+// Type returns the db operation type.
+func (op *Operation) Type() db.OperationType {
+ return op.dbOpType
+}
From 5c071136da9fba044a289f4d092529c282e7e5b2 Mon Sep 17 00:00:00 2001
From: Thomas Hipp <thomas.hipp at canonical.com>
Date: Tue, 7 Jul 2020 08:29:48 +0200
Subject: [PATCH 5/6] lxd: Add waitForOperations()
This function checks for running operations every second. If after 10
minutes there still are exec/console operations, the function returns
and LXD will proceed with the shutdown.
Signed-off-by: Thomas Hipp <thomas.hipp at canonical.com>
---
lxd/operations.go | 45 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 45 insertions(+)
diff --git a/lxd/operations.go b/lxd/operations.go
index 7434be4be3..df6db6685a 100644
--- a/lxd/operations.go
+++ b/lxd/operations.go
@@ -4,6 +4,7 @@ import (
"fmt"
"net/http"
"strings"
+ "time"
"github.com/gorilla/mux"
"github.com/gorilla/websocket"
@@ -17,6 +18,7 @@ import (
"github.com/lxc/lxd/lxd/util"
"github.com/lxc/lxd/shared"
"github.com/lxc/lxd/shared/api"
+ "github.com/lxc/lxd/shared/logger"
)
var operationCmd = APIEndpoint{
@@ -44,6 +46,49 @@ var operationWebsocket = APIEndpoint{
Get: APIEndpointAction{Handler: operationWebsocketGet, AllowUntrusted: true},
}
+func waitForOperations() {
+ timeout := time.After(10 * time.Minute)
+ tick := time.Tick(time.Second)
+ logTick := time.Tick(time.Minute)
+
+ for {
+ <-tick
+
+ // Get all the operations
+ operations.Lock()
+ ops := operations.Operations()
+ operations.Unlock()
+
+ // No more operations left. Exit function.
+ if len(ops) == 0 {
+ return
+ }
+
+ count := 0
+
+ for _, op := range ops {
+ opType := op.Type()
+
+ if opType == db.OperationCommandExec || opType == db.OperationConsoleShow {
+ count++
+ }
+ }
+
+ select {
+ case <-timeout:
+ // We wait up to 10 minutes for exec/console operations to finish.
+ // We don't need to explicity kill them (they cannot be cancelled anyway), as they will
+ // be automatically terminated once the daemon exits.
+ logger.Info("Shutdown timeout reached")
+ return
+ case <-logTick:
+ // Print log message every minute.
+ logger.Infof("Waiting for %d exec/console operation(s) to finish", count)
+ default:
+ }
+ }
+}
+
// API functions
func operationGet(d *Daemon, r *http.Request) response.Response {
id := mux.Vars(r)["id"]
From 46d3f617a0a7fa98eb49b779af6ac0a5914139bb Mon Sep 17 00:00:00 2001
From: Thomas Hipp <thomas.hipp at canonical.com>
Date: Tue, 7 Jul 2020 08:31:08 +0200
Subject: [PATCH 6/6] lxd: Shut down gracefully
This allows the daemon to shut down gracefully. That means, it waits (10
minutes) for exec/console operations to finish and then proceeds with
the shutdown.
Signed-off-by: Thomas Hipp <thomas.hipp at canonical.com>
---
lxd/main_daemon.go | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/lxd/main_daemon.go b/lxd/main_daemon.go
index 551a526f6c..e6daa650eb 100644
--- a/lxd/main_daemon.go
+++ b/lxd/main_daemon.go
@@ -90,7 +90,12 @@ func (c *cmdDaemon) Run(cmd *cobra.Command, args []string) error {
}
case <-d.shutdownChan:
- logger.Infof("Asked to shutdown by API, shutting down instances")
+ // Close the shutdown channel and set it to nil. This way, other functions will know that
+ // we're about to shut down.
+ close(d.shutdownChan)
+ d.shutdownChan = nil
+ logger.Infof("Asked to shutdown by API, waiting for operations to finish")
+ waitForOperations()
d.Kill()
instancesShutdown(s)
networkShutdown(s)
More information about the lxc-devel
mailing list