[lxc-devel] [lxd/master] Fix cluster upgrade deadlocks

stgraber on Github lxc-bot at linuxcontainers.org
Wed Oct 9 21:35:14 UTC 2019


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 301 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20191009/d2c219bd/attachment-0001.bin>
-------------- next part --------------
From 52e0b4d19e0fd7ffe38dbc86d55a9ec989547897 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Tue, 8 Oct 2019 01:11:20 -0400
Subject: [PATCH 1/6] lxd/cluster: Process upgrade notifications on all members
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/cluster/gateway.go | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lxd/cluster/gateway.go b/lxd/cluster/gateway.go
index 1b83722abb..f16705ec43 100644
--- a/lxd/cluster/gateway.go
+++ b/lxd/cluster/gateway.go
@@ -221,12 +221,6 @@ func (g *Gateway) HandlerFuncs(nodeRefreshTask func(*APIHeartbeat)) map[string]h
 			return
 		}
 
-		// From here on we require that this node is part of the raft cluster.
-		if g.server == nil || g.memoryDial != nil {
-			http.NotFound(w, r)
-			return
-		}
-
 		// Handle database upgrade notifications.
 		if r.Method == "PATCH" {
 			select {
@@ -236,6 +230,12 @@ func (g *Gateway) HandlerFuncs(nodeRefreshTask func(*APIHeartbeat)) map[string]h
 			return
 		}
 
+		// From here on we require that this node is part of the raft cluster.
+		if g.server == nil || g.memoryDial != nil {
+			http.NotFound(w, r)
+			return
+		}
+
 		// Before actually establishing the connection, our dialer
 		// probes the node to see if it's currently the leader
 		// (otherwise it tries with another node or retry later).

From 5026fd2b0b49f31a8360e77137f81a7570d556f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 9 Oct 2019 16:33:00 -0400
Subject: [PATCH 2/6] lxd/cluster: Relax upgrade notification target
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This changes the logic used to broadcast a successful database state to
the other cluster members during startup.

The previous logic was using NotifyAll which fails if any one of the
other members is considered offline and as there is no retry for this
notification logic, this can lead to the entire cluster deadlocking
during upgrade.

This change now introduces NotifyTryAll which will attempt to send the
notification message to all cluster members, regardless of their
expected state. The assumption is that all memers currently online
should get the message and the rest will notice the database being ready
by the time they start.

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/cluster/notify.go  | 7 +++++--
 lxd/cluster/upgrade.go | 3 ++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/lxd/cluster/notify.go b/lxd/cluster/notify.go
index f360719402..efed432b62 100644
--- a/lxd/cluster/notify.go
+++ b/lxd/cluster/notify.go
@@ -24,8 +24,9 @@ type NotifierPolicy int
 
 // Possible notifcation policies.
 const (
-	NotifyAll   NotifierPolicy = iota // Requires that all nodes are up.
-	NotifyAlive                       // Only notifies nodes that are alive
+	NotifyAll    NotifierPolicy = iota // Requires that all nodes are up.
+	NotifyAlive                        // Only notifies nodes that are alive
+	NotifyTryAll                       // Attempt to notify all nodes regardless of state.
 )
 
 // NewNotifier builds a Notifier that can be used to notify other peers using
@@ -57,12 +58,14 @@ func NewNotifier(state *state.State, cert *shared.CertInfo, policy NotifierPolic
 			if node.Address == address || node.Address == "0.0.0.0" {
 				continue // Exclude ourselves
 			}
+
 			if node.IsOffline(offlineThreshold) {
 				switch policy {
 				case NotifyAll:
 					return fmt.Errorf("peer node %s is down", node.Address)
 				case NotifyAlive:
 					continue // Just skip this node
+				case NotifyTryAll:
 				}
 			}
 			peers = append(peers, node.Address)
diff --git a/lxd/cluster/upgrade.go b/lxd/cluster/upgrade.go
index b7eabc2424..641f6ac9bf 100644
--- a/lxd/cluster/upgrade.go
+++ b/lxd/cluster/upgrade.go
@@ -18,10 +18,11 @@ import (
 // nodes which was waiting for this node to be upgraded should re-check if it's
 // okay to move forward.
 func NotifyUpgradeCompleted(state *state.State, cert *shared.CertInfo) error {
-	notifier, err := NewNotifier(state, cert, NotifyAll)
+	notifier, err := NewNotifier(state, cert, NotifyTryAll)
 	if err != nil {
 		return err
 	}
+
 	return notifier(func(client lxd.InstanceServer) error {
 		info, err := client.GetConnectionInfo()
 		if err != nil {

From 3fc077345762837a430a84740a1fed0109d41dd4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 9 Oct 2019 17:23:18 -0400
Subject: [PATCH 3/6] lxd/db: Export GetNodeID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/db/db.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lxd/db/db.go b/lxd/db/db.go
index 3c769fd623..4799102aac 100644
--- a/lxd/db/db.go
+++ b/lxd/db/db.go
@@ -341,6 +341,11 @@ func (c *Cluster) SetDefaultTimeout(timeout time.Duration) {
 	driver.SetContextTimeout(timeout)
 }
 
+// GetNodeID returns the current nodeID (0 if not set)
+func (c *Cluster) GetNodeID() int64 {
+	return c.nodeID
+}
+
 // Transaction creates a new ClusterTx object and transactionally executes the
 // cluster database interactions invoked by the given function. If the function
 // returns no error, all database changes are committed to the cluster database

From 7010eb2f19a58fda9accb7e59ade1cc6f18ee320 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 9 Oct 2019 17:24:02 -0400
Subject: [PATCH 4/6] lxd/daemon: Skip heartbeat processing during startup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/daemon.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lxd/daemon.go b/lxd/daemon.go
index 0ffd5235fe..2ee64d48ba 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -1372,6 +1372,11 @@ func (d *Daemon) hasNodeListChanged(heartbeatData *cluster.APIHeartbeat) bool {
 // NodeRefreshTask is run each time a fresh node is generated.
 // This can be used to trigger actions when the node list changes.
 func (d *Daemon) NodeRefreshTask(heartbeatData *cluster.APIHeartbeat) {
+	// Don't process the heartbeat until we're fully online
+	if d.cluster == nil || d.cluster.GetNodeID() == 0 {
+		return
+	}
+
 	// If the max version of the cluster has changed, check whether we need to upgrade.
 	if d.lastNodeList == nil || d.lastNodeList.Version.APIExtensions != heartbeatData.Version.APIExtensions || d.lastNodeList.Version.Schema != heartbeatData.Version.Schema {
 		err := cluster.MaybeUpdate(d.State())

From 3d217085e614f5c74ae043c8b55bee85b18f262c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 9 Oct 2019 17:24:46 -0400
Subject: [PATCH 5/6] lxd/db: Backward compat code for Nodes()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/db/node.go | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/lxd/db/node.go b/lxd/db/node.go
index 667007f6a4..a14b325667 100644
--- a/lxd/db/node.go
+++ b/lxd/db/node.go
@@ -222,28 +222,32 @@ func (c *ClusterTx) nodes(pending bool, where string, args ...interface{}) ([]No
 	// Get node roles
 	sql := "SELECT node_id, role FROM nodes_roles;"
 
+	nodeRoles := map[int64][]string{}
 	rows, err := c.tx.Query(sql)
 	if err != nil {
-		return nil, err
-	}
-	defer rows.Close()
-
-	nodeRoles := map[int64][]string{}
-	for i := 0; rows.Next(); i++ {
-		var nodeID int64
-		var role int
-		err := rows.Scan(&nodeID, &role)
-		if err != nil {
+		if err.Error() != "no such table: nodes_roles" {
 			return nil, err
 		}
+	} else {
+		// Don't fail on a missing table, we need to handle updates
+		defer rows.Close()
 
-		if nodeRoles[nodeID] == nil {
-			nodeRoles[nodeID] = []string{}
-		}
+		for i := 0; rows.Next(); i++ {
+			var nodeID int64
+			var role int
+			err := rows.Scan(&nodeID, &role)
+			if err != nil {
+				return nil, err
+			}
 
-		roleName := string(ClusterRoles[role])
+			if nodeRoles[nodeID] == nil {
+				nodeRoles[nodeID] = []string{}
+			}
 
-		nodeRoles[nodeID] = append(nodeRoles[nodeID], roleName)
+			roleName := string(ClusterRoles[role])
+
+			nodeRoles[nodeID] = append(nodeRoles[nodeID], roleName)
+		}
 	}
 
 	err = rows.Err()

From ff87139a5e1c34ae08fc5ca02c41d26d19dc21e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 9 Oct 2019 17:26:40 -0400
Subject: [PATCH 6/6] lxd/daemon: Set gateway.Cluster during
 WaitUpgradeNotification
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/daemon.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lxd/daemon.go b/lxd/daemon.go
index 2ee64d48ba..7c7bba6573 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -741,9 +741,11 @@ func (d *Daemon) init() error {
 			// The only thing we want to still do on this node is
 			// to run the heartbeat task, in case we are the raft
 			// leader.
+			d.gateway.Cluster = d.cluster
 			stop, _ := task.Start(cluster.HeartbeatTask(d.gateway))
 			d.gateway.WaitUpgradeNotification()
 			stop(time.Second)
+			d.gateway.Cluster = nil
 
 			d.cluster.Close()
 


More information about the lxc-devel mailing list