[lxc-devel] [lxd/master] Fix misleading nodes are behind error message

freeekanayaka on Github lxc-bot at linuxcontainers.org
Mon Jun 29 11:07:03 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 503 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200629/d68180ea/attachment.bin>
-------------- next part --------------
From aa1130d3cb8ceae1d20a733fe475763d500d0226 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Mon, 29 Jun 2020 11:58:25 +0200
Subject: [PATCH 1/2] lxd/cluster: Always check for dqlite protocol version
 mismatches

The check on the client side was being performed only for HEAD requests.

Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
 lxd/cluster/gateway.go | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/lxd/cluster/gateway.go b/lxd/cluster/gateway.go
index 10de37a48b..9280800440 100644
--- a/lxd/cluster/gateway.go
+++ b/lxd/cluster/gateway.go
@@ -890,20 +890,6 @@ func dqliteNetworkDial(ctx context.Context, addr string, g *Gateway, checkLeader
 			return nil, err
 		}
 
-		// If the remote server has detected that we are out of date, let's
-		// trigger an upgrade.
-		if response.StatusCode == http.StatusUpgradeRequired {
-			g.lock.Lock()
-			defer g.lock.Unlock()
-			if !g.upgradeTriggered {
-				err = triggerUpdate()
-				if err == nil {
-					g.upgradeTriggered = true
-				}
-			}
-			return nil, fmt.Errorf("Upgrade needed")
-		}
-
 		// If the endpoint does not exists, it means that the target node is
 		// running version 1 of dqlite protocol. In that case we simply behave
 		// as the node was at an older LXD version.
@@ -951,6 +937,21 @@ func dqliteNetworkDial(ctx context.Context, addr string, g *Gateway, checkLeader
 	if err != nil {
 		return nil, errors.Wrap(err, "Failed to read response")
 	}
+
+	// If the remote server has detected that we are out of date, let's
+	// trigger an upgrade.
+	if response.StatusCode == http.StatusUpgradeRequired {
+		g.lock.Lock()
+		defer g.lock.Unlock()
+		if !g.upgradeTriggered {
+			err = triggerUpdate()
+			if err == nil {
+				g.upgradeTriggered = true
+			}
+		}
+		return nil, fmt.Errorf("Upgrade needed")
+	}
+
 	if response.StatusCode != http.StatusSwitchingProtocols {
 		return nil, fmt.Errorf("Dialing failed: expected status code 101 got %d", response.StatusCode)
 	}

From 37fa0d026640df6074fd1973f3d169d55f8d7ab9 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Mon, 29 Jun 2020 12:25:54 +0200
Subject: [PATCH 2/2] lxd/cluster: Don't run unncessary HEAD probe upon dqlite
 connections

This is not needed anymore now that all nodes run the dqlite engine

Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
 lxd/cluster/gateway.go | 60 +++++++++++++-----------------------------
 1 file changed, 19 insertions(+), 41 deletions(-)

diff --git a/lxd/cluster/gateway.go b/lxd/cluster/gateway.go
index 9280800440..42c2ae3eee 100644
--- a/lxd/cluster/gateway.go
+++ b/lxd/cluster/gateway.go
@@ -261,9 +261,11 @@ func (g *Gateway) HandlerFuncs(nodeRefreshTask func(*APIHeartbeat)) map[string]h
 			return
 		}
 
-		// Before actually establishing the connection, our dialer
-		// probes the node to see if it's currently the leader
-		// (otherwise it tries with another node or retry later).
+		// NOTE: this is kept for backward compatibility when upgrading
+		// a cluster with version <= 4.2.
+		//
+		// Once all nodes are on >= 4.3 this code is effectively
+		// unused.
 		if r.Method == "HEAD" {
 			// We can safely know about current leader only if we are a voter.
 			if g.info.Role != db.RaftVoter {
@@ -383,7 +385,18 @@ func (g *Gateway) DialFunc() client.DialFunc {
 			return g.memoryDial(ctx, address)
 		}
 
-		return dqliteNetworkDial(ctx, address, g, true)
+		conn, err := dqliteNetworkDial(ctx, address, g)
+		if err != nil {
+			return nil, err
+		}
+
+		// We successfully established a connection with the leader. Maybe the
+		// leader is ourselves, and we were recently elected. In that case
+		// trigger a full heartbeat now: it will be a no-op if we aren't
+		// actually leaders.
+		go g.heartbeat(g.ctx, true)
+
+		return conn, nil
 	}
 }
 
@@ -397,7 +410,7 @@ func (g *Gateway) raftDial() client.DialFunc {
 			}
 			address = string(addr)
 		}
-		conn, err := dqliteNetworkDial(ctx, address, g, false)
+		conn, err := dqliteNetworkDial(ctx, address, g)
 		if err != nil {
 			return nil, err
 		}
@@ -867,40 +880,13 @@ func (g *Gateway) raftAddress(databaseID uint64) (string, error) {
 	return address, nil
 }
 
-func dqliteNetworkDial(ctx context.Context, addr string, g *Gateway, checkLeader bool) (net.Conn, error) {
+func dqliteNetworkDial(ctx context.Context, addr string, g *Gateway) (net.Conn, error) {
 	config, err := tlsClientConfig(g.cert)
 	if err != nil {
 		return nil, err
 	}
 
 	path := fmt.Sprintf("https://%s%s", addr, databaseEndpoint)
-	if checkLeader {
-		// Make a probe HEAD request to check if the target node is the leader.
-		request, err := http.NewRequest("HEAD", path, nil)
-		if err != nil {
-			return nil, err
-		}
-		setDqliteVersionHeader(request)
-		request = request.WithContext(ctx)
-		transport, cleanup := tlsTransport(config)
-		defer cleanup()
-		client := &http.Client{Transport: transport}
-		response, err := client.Do(request)
-		if err != nil {
-			return nil, err
-		}
-
-		// If the endpoint does not exists, it means that the target node is
-		// running version 1 of dqlite protocol. In that case we simply behave
-		// as the node was at an older LXD version.
-		if response.StatusCode == http.StatusNotFound {
-			return nil, db.ErrSomeNodesAreBehind
-		}
-
-		if response.StatusCode != http.StatusOK {
-			return nil, fmt.Errorf(response.Status)
-		}
-	}
 
 	// Establish the connection
 	request := &http.Request{
@@ -959,14 +945,6 @@ func dqliteNetworkDial(ctx context.Context, addr string, g *Gateway, checkLeader
 		return nil, fmt.Errorf("Missing or unexpected Upgrade header in response")
 	}
 
-	// We successfully established a connection with the leader. Maybe the
-	// leader is ourselves, and we were recently elected. In that case
-	// trigger a full heartbeat now: it will be a no-op if we aren't
-	// actually leaders.
-	if checkLeader {
-		go g.heartbeat(g.ctx, true)
-	}
-
 	return conn, nil
 }
 


More information about the lxc-devel mailing list