[lxc-devel] [lxd/master] Graceful shutdown

monstermunchkin on Github lxc-bot at linuxcontainers.org
Tue Jul 7 06:32:55 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 301 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200706/0df3f737/attachment.bin>
-------------- next part --------------
From c590810a44e25aea2dcb0b5efc01642af5a1fec6 Mon Sep 17 00:00:00 2001
From: Thomas Hipp <thomas.hipp at canonical.com>
Date: Mon, 6 Jul 2020 16:17:54 +0200
Subject: [PATCH 1/6] lxd/main_daemon: s/containers/instances/

This changes the shutdown log message to use "instances" instead of
"containers".

Signed-off-by: Thomas Hipp <thomas.hipp at canonical.com>
---
 lxd/main_daemon.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lxd/main_daemon.go b/lxd/main_daemon.go
index e6ade1aac0..65782ff201 100644
--- a/lxd/main_daemon.go
+++ b/lxd/main_daemon.go
@@ -80,7 +80,7 @@ func (c *cmdDaemon) Run(cmd *cobra.Command, args []string) error {
 	select {
 	case sig := <-ch:
 		if sig == unix.SIGPWR {
-			logger.Infof("Received '%s signal', shutting down containers", sig)
+			logger.Infof("Received '%s signal', shutting down instances", sig)
 			d.Kill()
 			containersShutdown(s)
 			networkShutdown(s)
@@ -90,7 +90,7 @@ func (c *cmdDaemon) Run(cmd *cobra.Command, args []string) error {
 		}
 
 	case <-d.shutdownChan:
-		logger.Infof("Asked to shutdown by API, shutting down containers")
+		logger.Infof("Asked to shutdown by API, shutting down instances")
 		d.Kill()
 		containersShutdown(s)
 		networkShutdown(s)

From a56b71de165469ad08afcc18ddb5be3247cba616 Mon Sep 17 00:00:00 2001
From: Thomas Hipp <thomas.hipp at canonical.com>
Date: Mon, 6 Jul 2020 16:20:01 +0200
Subject: [PATCH 2/6] lxd: s/containersShutdown/instancesShutdown/

This renames the function containersShutdown() to instancesShutdown().

Signed-off-by: Thomas Hipp <thomas.hipp at canonical.com>
---
 lxd/daemon.go      | 2 +-
 lxd/instances.go   | 2 +-
 lxd/main_daemon.go | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lxd/daemon.go b/lxd/daemon.go
index f04ceb58dd..aec7e6f11b 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -856,7 +856,7 @@ func (d *Daemon) init() error {
 
 		logger.Debugf("Restarting all the containers following directory rename")
 		s := d.State()
-		containersShutdown(s)
+		instancesShutdown(s)
 		containersRestart(s)
 	}
 
diff --git a/lxd/instances.go b/lxd/instances.go
index 900d938d73..9bb2272725 100644
--- a/lxd/instances.go
+++ b/lxd/instances.go
@@ -313,7 +313,7 @@ func containersOnDisk() (map[string][]string, error) {
 	return containers, nil
 }
 
-func containersShutdown(s *state.State) error {
+func instancesShutdown(s *state.State) error {
 	var wg sync.WaitGroup
 
 	dbAvailable := true
diff --git a/lxd/main_daemon.go b/lxd/main_daemon.go
index 65782ff201..551a526f6c 100644
--- a/lxd/main_daemon.go
+++ b/lxd/main_daemon.go
@@ -82,7 +82,7 @@ func (c *cmdDaemon) Run(cmd *cobra.Command, args []string) error {
 		if sig == unix.SIGPWR {
 			logger.Infof("Received '%s signal', shutting down instances", sig)
 			d.Kill()
-			containersShutdown(s)
+			instancesShutdown(s)
 			networkShutdown(s)
 		} else {
 			logger.Infof("Received '%s signal', exiting", sig)
@@ -92,7 +92,7 @@ func (c *cmdDaemon) Run(cmd *cobra.Command, args []string) error {
 	case <-d.shutdownChan:
 		logger.Infof("Asked to shutdown by API, shutting down instances")
 		d.Kill()
-		containersShutdown(s)
+		instancesShutdown(s)
 		networkShutdown(s)
 	}
 

From abbd4b4b7c4c6286de72f3eb8d1ed11c57d42d49 Mon Sep 17 00:00:00 2001
From: Thomas Hipp <thomas.hipp at canonical.com>
Date: Tue, 7 Jul 2020 08:21:15 +0200
Subject: [PATCH 3/6] lxd: Add shutdown indicator to state

This adds a new boolean to the state which indicates whether or not the
daemon is about to shut down.

Signed-off-by: Thomas Hipp <thomas.hipp at canonical.com>
---
 lxd/daemon.go         | 5 ++++-
 lxd/main_init.go      | 2 +-
 lxd/state/notlinux.go | 3 ++-
 lxd/state/state.go    | 6 +++++-
 lxd/state/testing.go  | 2 +-
 5 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/lxd/daemon.go b/lxd/daemon.go
index aec7e6f11b..2b68b9a02b 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -355,7 +355,10 @@ func writeMacaroonsRequiredResponse(b *identchecker.Bakery, r *http.Request, w h
 
 // State creates a new State instance linked to our internal db and os.
 func (d *Daemon) State() *state.State {
-	return state.NewState(d.db, d.cluster, d.maas, d.os, d.endpoints, d.events, d.devlxdEvents, d.firewall, d.proxy)
+	// If the daemon is shutting down the shutdown channel will be nil.
+	// This information should be available in the state in order prevent
+	// new operations from starting.
+	return state.NewState(d.db, d.cluster, d.maas, d.os, d.endpoints, d.events, d.devlxdEvents, d.firewall, d.proxy, d.shutdownChan == nil)
 }
 
 // UnixSocket returns the full path to the unix.socket file that this daemon is
diff --git a/lxd/main_init.go b/lxd/main_init.go
index 4c92d51d7f..cfee569faa 100644
--- a/lxd/main_init.go
+++ b/lxd/main_init.go
@@ -165,7 +165,7 @@ func (c *cmdInit) availableStorageDrivers(poolType string) []string {
 	}
 
 	// Get info for supported drivers.
-	s := state.NewState(nil, nil, nil, sys.DefaultOS(), nil, nil, nil, nil, nil)
+	s := state.NewState(nil, nil, nil, sys.DefaultOS(), nil, nil, nil, nil, nil, false)
 	supportedDrivers := storageDrivers.SupportedDrivers(s)
 
 	drivers := make([]string, 0, len(supportedDrivers))
diff --git a/lxd/state/notlinux.go b/lxd/state/notlinux.go
index 7111219f65..31b218ef0e 100644
--- a/lxd/state/notlinux.go
+++ b/lxd/state/notlinux.go
@@ -8,5 +8,6 @@ import (
 
 // State here is just an empty shim to statisfy dependencies.
 type State struct {
-	Events *events.Server
+	Events  *events.Server
+	Closing bool
 }
diff --git a/lxd/state/state.go b/lxd/state/state.go
index cedce5b884..6d7fef1ea6 100644
--- a/lxd/state/state.go
+++ b/lxd/state/state.go
@@ -38,11 +38,14 @@ type State struct {
 
 	// Firewall instance
 	Firewall firewall.Firewall
+
+	// True if daemon is shutting down
+	Closing bool
 }
 
 // NewState returns a new State object with the given database and operating
 // system components.
-func NewState(node *db.Node, cluster *db.Cluster, maas *maas.Controller, os *sys.OS, endpoints *endpoints.Endpoints, events *events.Server, devlxdEvents *events.Server, firewall firewall.Firewall, proxy func(req *http.Request) (*url.URL, error)) *State {
+func NewState(node *db.Node, cluster *db.Cluster, maas *maas.Controller, os *sys.OS, endpoints *endpoints.Endpoints, events *events.Server, devlxdEvents *events.Server, firewall firewall.Firewall, proxy func(req *http.Request) (*url.URL, error), closing bool) *State {
 	return &State{
 		Node:         node,
 		Cluster:      cluster,
@@ -53,5 +56,6 @@ func NewState(node *db.Node, cluster *db.Cluster, maas *maas.Controller, os *sys
 		Events:       events,
 		Firewall:     firewall,
 		Proxy:        proxy,
+		Closing:      closing,
 	}
 }
diff --git a/lxd/state/testing.go b/lxd/state/testing.go
index b0bc97fed5..c957fee0ed 100644
--- a/lxd/state/testing.go
+++ b/lxd/state/testing.go
@@ -26,7 +26,7 @@ func NewTestState(t *testing.T) (*State, func()) {
 		osCleanup()
 	}
 
-	state := NewState(node, cluster, nil, os, nil, nil, nil, firewall.New(), nil)
+	state := NewState(node, cluster, nil, os, nil, nil, nil, firewall.New(), nil, false)
 
 	return state, cleanup
 }

From 51562d7af2d9cd0711c19d37fa9e616ff284828b Mon Sep 17 00:00:00 2001
From: Thomas Hipp <thomas.hipp at canonical.com>
Date: Tue, 7 Jul 2020 08:23:43 +0200
Subject: [PATCH 4/6] lxd/operations: Add db operation type to Operation

This adds the db operation type to Operation which then can be checked.

Signed-off-by: Thomas Hipp <thomas.hipp at canonical.com>
---
 lxd/operations/operations.go | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/lxd/operations/operations.go b/lxd/operations/operations.go
index 787382bb82..db815960f4 100644
--- a/lxd/operations/operations.go
+++ b/lxd/operations/operations.go
@@ -93,6 +93,7 @@ type Operation struct {
 	canceler    *cancel.Canceler
 	description string
 	permission  string
+	dbOpType    db.OperationType
 
 	// Those functions are called at various points in the Operation lifecycle
 	onRun     func(*Operation) error
@@ -112,12 +113,18 @@ type Operation struct {
 // OperationCreate creates a new operation and returns it. If it cannot be
 // created, it returns an error.
 func OperationCreate(s *state.State, project string, opClass operationClass, opType db.OperationType, opResources map[string][]string, opMetadata interface{}, onRun func(*Operation) error, onCancel func(*Operation) error, onConnect func(*Operation, *http.Request, http.ResponseWriter) error) (*Operation, error) {
+	// Don't allow new operations when LXD is shutting down.
+	if s.Closing {
+		return nil, fmt.Errorf("LXD is shutting down")
+	}
+
 	// Main attributes
 	op := Operation{}
 	op.project = project
 	op.id = uuid.NewRandom().String()
 	op.description = opType.Description()
 	op.permission = opType.Permission()
+	op.dbOpType = opType
 	op.class = opClass
 	op.createdAt = time.Now()
 	op.updatedAt = op.createdAt
@@ -546,3 +553,8 @@ func (op *Operation) Project() string {
 func (op *Operation) Status() api.StatusCode {
 	return op.status
 }
+
+// Type returns the db operation type.
+func (op *Operation) Type() db.OperationType {
+	return op.dbOpType
+}

From 5c071136da9fba044a289f4d092529c282e7e5b2 Mon Sep 17 00:00:00 2001
From: Thomas Hipp <thomas.hipp at canonical.com>
Date: Tue, 7 Jul 2020 08:29:48 +0200
Subject: [PATCH 5/6] lxd: Add waitForOperations()

This function checks for running operations every second. If after 10
minutes there still are exec/console operations, the function returns
and LXD will proceed with the shutdown.

Signed-off-by: Thomas Hipp <thomas.hipp at canonical.com>
---
 lxd/operations.go | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/lxd/operations.go b/lxd/operations.go
index 7434be4be3..df6db6685a 100644
--- a/lxd/operations.go
+++ b/lxd/operations.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"net/http"
 	"strings"
+	"time"
 
 	"github.com/gorilla/mux"
 	"github.com/gorilla/websocket"
@@ -17,6 +18,7 @@ import (
 	"github.com/lxc/lxd/lxd/util"
 	"github.com/lxc/lxd/shared"
 	"github.com/lxc/lxd/shared/api"
+	"github.com/lxc/lxd/shared/logger"
 )
 
 var operationCmd = APIEndpoint{
@@ -44,6 +46,49 @@ var operationWebsocket = APIEndpoint{
 	Get: APIEndpointAction{Handler: operationWebsocketGet, AllowUntrusted: true},
 }
 
+func waitForOperations() {
+	timeout := time.After(10 * time.Minute)
+	tick := time.Tick(time.Second)
+	logTick := time.Tick(time.Minute)
+
+	for {
+		<-tick
+
+		// Get all the operations
+		operations.Lock()
+		ops := operations.Operations()
+		operations.Unlock()
+
+		// No more operations left. Exit function.
+		if len(ops) == 0 {
+			return
+		}
+
+		count := 0
+
+		for _, op := range ops {
+			opType := op.Type()
+
+			if opType == db.OperationCommandExec || opType == db.OperationConsoleShow {
+				count++
+			}
+		}
+
+		select {
+		case <-timeout:
+			// We wait up to 10 minutes for exec/console operations to finish.
+			// We don't need to explicity kill them (they cannot be cancelled anyway), as they will
+			// be automatically terminated once the daemon exits.
+			logger.Info("Shutdown timeout reached")
+			return
+		case <-logTick:
+			// Print log message every minute.
+			logger.Infof("Waiting for %d exec/console operation(s) to finish", count)
+		default:
+		}
+	}
+}
+
 // API functions
 func operationGet(d *Daemon, r *http.Request) response.Response {
 	id := mux.Vars(r)["id"]

From 46d3f617a0a7fa98eb49b779af6ac0a5914139bb Mon Sep 17 00:00:00 2001
From: Thomas Hipp <thomas.hipp at canonical.com>
Date: Tue, 7 Jul 2020 08:31:08 +0200
Subject: [PATCH 6/6] lxd: Shut down gracefully

This allows the daemon to shut down gracefully. That means, it waits (10
minutes) for exec/console operations to finish and then proceeds with
the shutdown.

Signed-off-by: Thomas Hipp <thomas.hipp at canonical.com>
---
 lxd/main_daemon.go | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/lxd/main_daemon.go b/lxd/main_daemon.go
index 551a526f6c..e6daa650eb 100644
--- a/lxd/main_daemon.go
+++ b/lxd/main_daemon.go
@@ -90,7 +90,12 @@ func (c *cmdDaemon) Run(cmd *cobra.Command, args []string) error {
 		}
 
 	case <-d.shutdownChan:
-		logger.Infof("Asked to shutdown by API, shutting down instances")
+		// Close the shutdown channel and set it to nil. This way, other functions will know that
+		// we're about to shut down.
+		close(d.shutdownChan)
+		d.shutdownChan = nil
+		logger.Infof("Asked to shutdown by API, waiting for operations to finish")
+		waitForOperations()
 		d.Kill()
 		instancesShutdown(s)
 		networkShutdown(s)


More information about the lxc-devel mailing list