[lxc-devel] [lxd/master] Spread cluster heartbeats

Thu Apr 18 19:33:02 UTC 2019

A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 301 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20190418/e89fd7ec/attachment.bin>
-------------- next part --------------
From e15d7928ce4a240ac9c2eb670f483ef8b4f58d5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Thu, 18 Apr 2019 13:55:54 -0400
Subject: [PATCH 1/2] lxd/cluster: Bump heartbeatInterval to 10s
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/cluster/heartbeat.go  |  2 +-
 test/suites/clustering.sh | 22 +++++++++++-----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/lxd/cluster/heartbeat.go b/lxd/cluster/heartbeat.go
index 986505649f..78eacbba71 100644
--- a/lxd/cluster/heartbeat.go
+++ b/lxd/cluster/heartbeat.go
@@ -131,7 +131,7 @@ func Heartbeat(gateway *Gateway, cluster *db.Cluster) (task.Func, task.Schedule)
 }
 
 // Number of seconds to wait between to heartbeat rounds.
-const heartbeatInterval = 4
+const heartbeatInterval = 10
 
 // Perform a single heartbeat request against the node with the given address.
 func heartbeatNode(taskCtx context.Context, address string, cert *shared.CertInfo, raftNodes []db.RaftNode) error {
diff --git a/test/suites/clustering.sh b/test/suites/clustering.sh
index 9dc641f32d..25a5b06206 100644
--- a/test/suites/clustering.sh
+++ b/test/suites/clustering.sh
@@ -55,9 +55,9 @@ test_clustering_membership() {
   spawn_lxd_and_join_cluster "${ns2}" "${bridge}" "${cert}" 2 1 "${LXD_TWO_DIR}"
 
   # Configuration keys can be changed on any node.
-  LXD_DIR="${LXD_TWO_DIR}" lxc config set cluster.offline_threshold 30
-  LXD_DIR="${LXD_ONE_DIR}" lxc info | grep -q 'cluster.offline_threshold: "30"'
-  LXD_DIR="${LXD_TWO_DIR}" lxc info | grep -q 'cluster.offline_threshold: "30"'
+  LXD_DIR="${LXD_TWO_DIR}" lxc config set cluster.offline_threshold 40
+  LXD_DIR="${LXD_ONE_DIR}" lxc info | grep -q 'cluster.offline_threshold: "40"'
+  LXD_DIR="${LXD_TWO_DIR}" lxc info | grep -q 'cluster.offline_threshold: "40"'
 
   # The preseeded network bridge exists on all nodes.
   ns1_pid="$(cat "${TEST_DIR}/ns/${ns1}/PID")"
@@ -115,9 +115,9 @@ test_clustering_membership() {
 
   # Shutdown a database node, and wait a few seconds so it will be
   # detected as down.
-  LXD_DIR="${LXD_ONE_DIR}" lxc config set cluster.offline_threshold 5
+  LXD_DIR="${LXD_ONE_DIR}" lxc config set cluster.offline_threshold 12
   LXD_DIR="${LXD_THREE_DIR}" lxd shutdown
-  sleep 5
+  sleep 30
   LXD_DIR="${LXD_TWO_DIR}" lxc cluster list | grep "node3" | grep -q "OFFLINE"
   LXD_DIR="${LXD_TWO_DIR}" lxc config set cluster.offline_threshold 20
 
@@ -129,7 +129,7 @@ test_clustering_membership() {
 
   # Sleep a bit to let a heartbeat occur and update the list of raft nodes
   # everywhere, showing that node 4 has been promoted to database node.
-  sleep 5
+  sleep 30
   LXD_DIR="${LXD_TWO_DIR}" lxc cluster list | grep "node4" | grep -q "YES"
 
   # Now the preseeded network can be deleted, and all nodes are
@@ -310,9 +310,9 @@ test_clustering_containers() {
 
   # Shutdown node 2, wait for it to be considered offline, and list
   # containers.
-  LXD_DIR="${LXD_THREE_DIR}" lxc config set cluster.offline_threshold 5
+  LXD_DIR="${LXD_THREE_DIR}" lxc config set cluster.offline_threshold 12
   LXD_DIR="${LXD_TWO_DIR}" lxd shutdown
-  sleep 5
+  sleep 30
   LXD_DIR="${LXD_ONE_DIR}" lxc list | grep foo | grep -q ERROR
   LXD_DIR="${LXD_ONE_DIR}" lxc config set cluster.offline_threshold 20
 
@@ -493,9 +493,9 @@ test_clustering_storage() {
     LXD_DIR="${LXD_ONE_DIR}" lxc info bar | grep -q "backup (taken at"
 
     # Shutdown node 3, and wait for it to be considered offline.
-    LXD_DIR="${LXD_THREE_DIR}" lxc config set cluster.offline_threshold 5
+    LXD_DIR="${LXD_THREE_DIR}" lxc config set cluster.offline_threshold 12
     LXD_DIR="${LXD_THREE_DIR}" lxd shutdown
-    sleep 5
+    sleep 30
 
     # Move the container back to node2, even if node3 is offline
     LXD_DIR="${LXD_ONE_DIR}" lxc move bar --target node2
@@ -987,7 +987,7 @@ test_clustering_shutdown_nodes() {
   wait "$(cat three.pid)"
 
   # Make sure the database is not available to the first node
-  sleep 5
+  sleep 30
   LXD_DIR="${LXD_ONE_DIR}" lxd shutdown
 
   # Wait for LXD to terminate, otherwise the db will not be empty, and the

From b89414016296683d7a20e1d456f4997b9c64c96a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Thu, 18 Apr 2019 14:11:45 -0400
Subject: [PATCH 2/2] lxd/cluster: Spread hearbeats in time
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/cluster/heartbeat.go | 39 ++++++++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/lxd/cluster/heartbeat.go b/lxd/cluster/heartbeat.go
index 78eacbba71..980f8c7018 100644
--- a/lxd/cluster/heartbeat.go
+++ b/lxd/cluster/heartbeat.go
@@ -4,7 +4,9 @@ import (
 	"bytes"
 	"encoding/json"
 	"fmt"
+	"math/rand"
 	"net/http"
+	"sync"
 	"time"
 
 	"github.com/hashicorp/raft"
@@ -27,13 +29,12 @@ func Heartbeat(gateway *Gateway, cluster *db.Cluster) (task.Func, task.Schedule)
 			// We're not a raft node or we're not clustered
 			return
 		}
-		logger.Debugf("Starting heartbeat round")
 
 		raftNodes, err := gateway.currentRaftNodes()
 		if err == raft.ErrNotLeader {
-			logger.Debugf("Skipping heartbeat since we're not leader")
 			return
 		}
+		logger.Debugf("Starting heartbeat round")
 		if err != nil {
 			logger.Warnf("Failed to get current raft nodes: %v", err)
 			return
@@ -70,22 +71,41 @@ func Heartbeat(gateway *Gateway, cluster *db.Cluster) (task.Func, task.Schedule)
 			logger.Warnf("Failed to get current cluster nodes: %v", err)
 			return
 		}
+
 		heartbeats := make([]time.Time, len(nodes))
+		heartbeatsLock := sync.Mutex{}
+		heartbeatsWg := sync.WaitGroup{}
+
 		for i, node := range nodes {
-			func(i int, address string) {
-				var err error
-				// Only send actual requests to other nodes
-				if address != nodeAddress {
-					err = heartbeatNode(ctx, address, gateway.cert, raftNodes)
-				}
+			// Special case the local node
+			if node.Address == nodeAddress {
+				heartbeatsLock.Lock()
+				heartbeats[i] = time.Now()
+				heartbeatsLock.Unlock()
+				continue
+			}
+
+			// Parallelize the rest
+			heartbeatsWg.Add(1)
+			go func(i int, address string) {
+				defer heartbeatsWg.Done()
+
+				// Spread in time by waiting up to 3s less than the interval
+				time.Sleep(time.Duration(rand.Intn((heartbeatInterval*1000)-3000)) * time.Millisecond)
+				logger.Debugf("Sending heartbeat to %s", address)
+
+				err := heartbeatNode(ctx, address, gateway.cert, raftNodes)
 				if err == nil {
-					logger.Debugf("Successful heartbeat for %s", address)
+					heartbeatsLock.Lock()
 					heartbeats[i] = time.Now()
+					heartbeatsLock.Unlock()
+					logger.Debugf("Successful heartbeat for %s", address)
 				} else {
 					logger.Debugf("Failed heartbeat for %s: %v", address, err)
 				}
 			}(i, node.Address)
 		}
+		heartbeatsWg.Wait()
 
 		// If the context has been cancelled, return immediately.
 		if ctx.Err() != nil {
@@ -98,6 +118,7 @@ func Heartbeat(gateway *Gateway, cluster *db.Cluster) (task.Func, task.Schedule)
 				if heartbeats[i].Equal(time.Time{}) {
 					continue
 				}
+
 				err := tx.NodeHeartbeat(node.Address, heartbeats[i])
 				if err != nil {
 					return err