[lxc-devel] [lxd/master] Fix misleading nodes are behind error message
freeekanayaka on Github
lxc-bot at linuxcontainers.org
Mon Jun 29 11:07:03 UTC 2020
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 503 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200629/d68180ea/attachment.bin>
-------------- next part --------------
From aa1130d3cb8ceae1d20a733fe475763d500d0226 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Mon, 29 Jun 2020 11:58:25 +0200
Subject: [PATCH 1/2] lxd/cluster: Always check for dqlite protocol version
mismatches
The check on the client side was being performed only for HEAD requests.
Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
lxd/cluster/gateway.go | 29 +++++++++++++++--------------
1 file changed, 15 insertions(+), 14 deletions(-)
diff --git a/lxd/cluster/gateway.go b/lxd/cluster/gateway.go
index 10de37a48b..9280800440 100644
--- a/lxd/cluster/gateway.go
+++ b/lxd/cluster/gateway.go
@@ -890,20 +890,6 @@ func dqliteNetworkDial(ctx context.Context, addr string, g *Gateway, checkLeader
return nil, err
}
- // If the remote server has detected that we are out of date, let's
- // trigger an upgrade.
- if response.StatusCode == http.StatusUpgradeRequired {
- g.lock.Lock()
- defer g.lock.Unlock()
- if !g.upgradeTriggered {
- err = triggerUpdate()
- if err == nil {
- g.upgradeTriggered = true
- }
- }
- return nil, fmt.Errorf("Upgrade needed")
- }
-
// If the endpoint does not exists, it means that the target node is
// running version 1 of dqlite protocol. In that case we simply behave
// as the node was at an older LXD version.
@@ -951,6 +937,21 @@ func dqliteNetworkDial(ctx context.Context, addr string, g *Gateway, checkLeader
if err != nil {
return nil, errors.Wrap(err, "Failed to read response")
}
+
+ // If the remote server has detected that we are out of date, let's
+ // trigger an upgrade.
+ if response.StatusCode == http.StatusUpgradeRequired {
+ g.lock.Lock()
+ defer g.lock.Unlock()
+ if !g.upgradeTriggered {
+ err = triggerUpdate()
+ if err == nil {
+ g.upgradeTriggered = true
+ }
+ }
+ return nil, fmt.Errorf("Upgrade needed")
+ }
+
if response.StatusCode != http.StatusSwitchingProtocols {
return nil, fmt.Errorf("Dialing failed: expected status code 101 got %d", response.StatusCode)
}
From 37fa0d026640df6074fd1973f3d169d55f8d7ab9 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Mon, 29 Jun 2020 12:25:54 +0200
Subject: [PATCH 2/2] lxd/cluster: Don't run unncessary HEAD probe upon dqlite
connections
This is not needed anymore now that all nodes run the dqlite engine
Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
lxd/cluster/gateway.go | 60 +++++++++++++-----------------------------
1 file changed, 19 insertions(+), 41 deletions(-)
diff --git a/lxd/cluster/gateway.go b/lxd/cluster/gateway.go
index 9280800440..42c2ae3eee 100644
--- a/lxd/cluster/gateway.go
+++ b/lxd/cluster/gateway.go
@@ -261,9 +261,11 @@ func (g *Gateway) HandlerFuncs(nodeRefreshTask func(*APIHeartbeat)) map[string]h
return
}
- // Before actually establishing the connection, our dialer
- // probes the node to see if it's currently the leader
- // (otherwise it tries with another node or retry later).
+ // NOTE: this is kept for backward compatibility when upgrading
+ // a cluster with version <= 4.2.
+ //
+ // Once all nodes are on >= 4.3 this code is effectively
+ // unused.
if r.Method == "HEAD" {
// We can safely know about current leader only if we are a voter.
if g.info.Role != db.RaftVoter {
@@ -383,7 +385,18 @@ func (g *Gateway) DialFunc() client.DialFunc {
return g.memoryDial(ctx, address)
}
- return dqliteNetworkDial(ctx, address, g, true)
+ conn, err := dqliteNetworkDial(ctx, address, g)
+ if err != nil {
+ return nil, err
+ }
+
+ // We successfully established a connection with the leader. Maybe the
+ // leader is ourselves, and we were recently elected. In that case
+ // trigger a full heartbeat now: it will be a no-op if we aren't
+ // actually leaders.
+ go g.heartbeat(g.ctx, true)
+
+ return conn, nil
}
}
@@ -397,7 +410,7 @@ func (g *Gateway) raftDial() client.DialFunc {
}
address = string(addr)
}
- conn, err := dqliteNetworkDial(ctx, address, g, false)
+ conn, err := dqliteNetworkDial(ctx, address, g)
if err != nil {
return nil, err
}
@@ -867,40 +880,13 @@ func (g *Gateway) raftAddress(databaseID uint64) (string, error) {
return address, nil
}
-func dqliteNetworkDial(ctx context.Context, addr string, g *Gateway, checkLeader bool) (net.Conn, error) {
+func dqliteNetworkDial(ctx context.Context, addr string, g *Gateway) (net.Conn, error) {
config, err := tlsClientConfig(g.cert)
if err != nil {
return nil, err
}
path := fmt.Sprintf("https://%s%s", addr, databaseEndpoint)
- if checkLeader {
- // Make a probe HEAD request to check if the target node is the leader.
- request, err := http.NewRequest("HEAD", path, nil)
- if err != nil {
- return nil, err
- }
- setDqliteVersionHeader(request)
- request = request.WithContext(ctx)
- transport, cleanup := tlsTransport(config)
- defer cleanup()
- client := &http.Client{Transport: transport}
- response, err := client.Do(request)
- if err != nil {
- return nil, err
- }
-
- // If the endpoint does not exists, it means that the target node is
- // running version 1 of dqlite protocol. In that case we simply behave
- // as the node was at an older LXD version.
- if response.StatusCode == http.StatusNotFound {
- return nil, db.ErrSomeNodesAreBehind
- }
-
- if response.StatusCode != http.StatusOK {
- return nil, fmt.Errorf(response.Status)
- }
- }
// Establish the connection
request := &http.Request{
@@ -959,14 +945,6 @@ func dqliteNetworkDial(ctx context.Context, addr string, g *Gateway, checkLeader
return nil, fmt.Errorf("Missing or unexpected Upgrade header in response")
}
- // We successfully established a connection with the leader. Maybe the
- // leader is ourselves, and we were recently elected. In that case
- // trigger a full heartbeat now: it will be a no-op if we aren't
- // actually leaders.
- if checkLeader {
- go g.heartbeat(g.ctx, true)
- }
-
return conn, nil
}
More information about the lxc-devel
mailing list