[lxc-devel] [lxd/master] Configurable number of voting and stand-by members

freeekanayaka on Github lxc-bot at linuxcontainers.org
Tue Feb 11 10:22:55 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 301 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200211/cd7d123d/attachment.bin>
-------------- next part --------------
From 90008601ba25788e190a7f6564e73915acf48e34 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Tue, 11 Feb 2020 09:34:42 +0000
Subject: [PATCH 1/2] Add cluster.n_voters and cluster.n_standby configuration
 keys

Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
 lxd/cluster/config.go      | 40 ++++++++++++++++++++++++++++++++++++++
 lxd/cluster/config_test.go | 13 +++++++++++++
 2 files changed, 53 insertions(+)

diff --git a/lxd/cluster/config.go b/lxd/cluster/config.go
index 2a7381664b..6791a20ae6 100644
--- a/lxd/cluster/config.go
+++ b/lxd/cluster/config.go
@@ -130,6 +130,18 @@ func (c *Config) ImagesMinimalReplica() int64 {
 	return c.m.GetInt64("cluster.images_minimal_replica")
 }
 
+// MaxVoters returns the maximum number of members in a cluster that will be
+// assigned the voter role.
+func (c *Config) MaxVoters() int64 {
+	return c.m.GetInt64("cluster.max_voters")
+}
+
+// MaxStandBy returns the maximum number of standby members in a cluster that
+// will be assigned the stand-by role.
+func (c *Config) MaxStandBy() int64 {
+	return c.m.GetInt64("cluster.max_standby")
+}
+
 // Dump current configuration keys and their values. Keys with values matching
 // their defaults are omitted.
 func (c *Config) Dump() map[string]interface{} {
@@ -222,6 +234,8 @@ var ConfigSchema = config.Schema{
 	"backups.compression_algorithm":  {Default: "gzip", Validator: validateCompression},
 	"cluster.offline_threshold":      {Type: config.Int64, Default: offlineThresholdDefault(), Validator: offlineThresholdValidator},
 	"cluster.images_minimal_replica": {Type: config.Int64, Default: "3", Validator: imageMinimalReplicaValidator},
+	"cluster.max_voters":             {Type: config.Int64, Default: "3", Validator: maxVotersValidator},
+	"cluster.max_standby":            {Type: config.Int64, Default: "2", Validator: maxStandByValidator},
 	"core.https_allowed_headers":     {},
 	"core.https_allowed_methods":     {},
 	"core.https_allowed_origin":      {},
@@ -292,6 +306,32 @@ func imageMinimalReplicaValidator(value string) error {
 	return nil
 }
 
+func maxVotersValidator(value string) error {
+	n, err := strconv.Atoi(value)
+	if err != nil {
+		return fmt.Errorf("Value is not a number")
+	}
+
+	if n < 3 || n%2 != 1 {
+		return fmt.Errorf("Value must be an odd number equal to or higher than 3")
+	}
+
+	return nil
+}
+
+func maxStandByValidator(value string) error {
+	n, err := strconv.Atoi(value)
+	if err != nil {
+		return fmt.Errorf("Value is not a number")
+	}
+
+	if n < 0 || n > 5 {
+		return fmt.Errorf("Value must be between 0 and 5")
+	}
+
+	return nil
+}
+
 func passwordSetter(value string) (string, error) {
 	// Nothing to do on unset
 	if value == "" {
diff --git a/lxd/cluster/config_test.go b/lxd/cluster/config_test.go
index 589e7527e1..4b2ab5e42a 100644
--- a/lxd/cluster/config_test.go
+++ b/lxd/cluster/config_test.go
@@ -64,6 +64,19 @@ func TestConfigLoad_OfflineThresholdValidator(t *testing.T) {
 
 }
 
+// Max number of voters must be odd.
+func TestConfigLoad_MaxVotersValidator(t *testing.T) {
+	tx, cleanup := db.NewTestClusterTx(t)
+	defer cleanup()
+
+	config, err := cluster.ConfigLoad(tx)
+	require.NoError(t, err)
+
+	_, err = config.Patch(map[string]interface{}{"cluster.max_voters": "4"})
+	require.EqualError(t, err, "cannot set 'cluster.max_voters' to '4': Value must be an odd number equal to or higher than 3")
+
+}
+
 // If some previously set values are missing from the ones passed to Replace(),
 // they are deleted from the configuration.
 func TestConfig_ReplaceDeleteValues(t *testing.T) {

From 1dfa048a28dbaa86669a5e389a5f3af49725241c Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Tue, 11 Feb 2020 09:59:52 +0000
Subject: [PATCH 2/2] Load configuration keys when checking for n of
 voters/stand-by

Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
 lxd/cluster/membership.go | 30 +++++++++++++++++++-----------
 lxd/daemon.go             | 18 +++++++++++++++++-
 2 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/lxd/cluster/membership.go b/lxd/cluster/membership.go
index 554e68d8ad..63477ab581 100644
--- a/lxd/cluster/membership.go
+++ b/lxd/cluster/membership.go
@@ -156,6 +156,9 @@ func Bootstrap(state *state.State, gateway *Gateway, name string) error {
 // Return an updated list raft database nodes (possibly including the newly
 // accepted node).
 func Accept(state *state.State, gateway *Gateway, name, address string, schema, api, arch int) ([]db.RaftNode, error) {
+	var maxVoters int64
+	var maxStandBy int64
+
 	// Check parameters
 	if name == "" {
 		return nil, fmt.Errorf("node name must not be empty")
@@ -167,8 +170,15 @@ func Accept(state *state.State, gateway *Gateway, name, address string, schema,
 	// Insert the new node into the nodes table.
 	var id int64
 	err := state.Cluster.Transaction(func(tx *db.ClusterTx) error {
+		config, err := ConfigLoad(tx)
+		if err != nil {
+			return errors.Wrap(err, "Load cluster configuration")
+		}
+		maxVoters = config.MaxVoters()
+		maxStandBy = config.MaxStandBy()
+
 		// Check that the node can be accepted with these parameters.
-		err := membershipCheckClusterStateForAccept(tx, name, address, schema, api)
+		err = membershipCheckClusterStateForAccept(tx, name, address, schema, api)
 		if err != nil {
 			return err
 		}
@@ -211,9 +221,9 @@ func Accept(state *state.State, gateway *Gateway, name, address string, schema,
 		}
 	}
 	node := db.RaftNode{ID: uint64(id), Address: address, Role: db.RaftSpare}
-	if count > 1 && voters < MaxVoters {
+	if count > 1 && voters < int(maxVoters) {
 		node.Role = db.RaftVoter
-	} else if standbys < MaxStandBys {
+	} else if standbys < int(maxStandBy) {
 		node.Role = db.RaftStandBy
 	}
 	nodes = append(nodes, node)
@@ -490,12 +500,16 @@ func Rebalance(state *state.State, gateway *Gateway) (string, []db.RaftNode, err
 	// timestamp and check whether they are offline.
 	nodesByAddress := map[string]db.NodeInfo{}
 	var offlineThreshold time.Duration
+	var maxVoters int64
+	var maxStandBy int64
 	err = state.Cluster.Transaction(func(tx *db.ClusterTx) error {
 		config, err := ConfigLoad(tx)
 		if err != nil {
 			return errors.Wrap(err, "failed load cluster configuration")
 		}
 		offlineThreshold = config.OfflineThreshold()
+		maxVoters = config.MaxVoters()
+		maxStandBy = config.MaxStandBy()
 		nodes, err := tx.Nodes()
 		if err != nil {
 			return errors.Wrap(err, "failed to get cluster nodes")
@@ -559,12 +573,12 @@ func Rebalance(state *state.State, gateway *Gateway) (string, []db.RaftNode, err
 
 	var role db.RaftRole
 
-	if len(voters) < MaxVoters && len(voters) > 1 {
+	if len(voters) < int(maxVoters) && len(voters) > 1 {
 		role = db.RaftVoter
 		// Include stand-by nodes among the ones that can be promoted,
 		// preferring them over spare ones.
 		candidates = append(standbys, candidates...)
-	} else if len(standbys) < MaxStandBys {
+	} else if len(standbys) < int(maxStandBy) {
 		role = db.RaftStandBy
 	} else {
 		// We're already at full capacity or would have a two-member cluster.
@@ -1123,9 +1137,3 @@ func membershipCheckNoLeftoverClusterCert(dir string) error {
 
 // SchemaVersion holds the version of the cluster database schema.
 var SchemaVersion = cluster.SchemaVersion
-
-// We currently aim at having 3 voter nodes and 2 stand-by.
-//
-// TODO: these numbers should probably be configurable.
-const MaxVoters = 3
-const MaxStandBys = 2
diff --git a/lxd/daemon.go b/lxd/daemon.go
index 6bdb4958b7..b6da9e7005 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -1478,7 +1478,23 @@ func (d *Daemon) NodeRefreshTask(heartbeatData *cluster.APIHeartbeat) {
 	// don't have enough voters or standbys, let's see if we can upgrade
 	// some member.
 	if len(heartbeatData.Members) > 2 {
-		if isDegraded || voters < cluster.MaxVoters || standbys < cluster.MaxStandBys {
+		var maxVoters int64
+		var maxStandBy int64
+		err := d.cluster.Transaction(func(tx *db.ClusterTx) error {
+			config, err := cluster.ConfigLoad(tx)
+			if err != nil {
+				return err
+			}
+			maxVoters = config.MaxVoters()
+			maxStandBy = config.MaxStandBy()
+			return nil
+		})
+		if err != nil {
+			logger.Errorf("Error loading cluster configuration: %v", err)
+			return
+		}
+
+		if isDegraded || voters < int(maxVoters) || standbys < int(maxStandBy) {
 			go func() {
 				d.clusterMembershipMutex.Lock()
 				defer d.clusterMembershipMutex.Unlock()


More information about the lxc-devel mailing list