[lxc-devel] [lxd/master] Configurable number of voting and stand-by members
freeekanayaka on Github
lxc-bot at linuxcontainers.org
Tue Feb 11 10:22:55 UTC 2020
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 301 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200211/cd7d123d/attachment.bin>
-------------- next part --------------
From 90008601ba25788e190a7f6564e73915acf48e34 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Tue, 11 Feb 2020 09:34:42 +0000
Subject: [PATCH 1/2] Add cluster.n_voters and cluster.n_standby configuration
keys
Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
lxd/cluster/config.go | 40 ++++++++++++++++++++++++++++++++++++++
lxd/cluster/config_test.go | 13 +++++++++++++
2 files changed, 53 insertions(+)
diff --git a/lxd/cluster/config.go b/lxd/cluster/config.go
index 2a7381664b..6791a20ae6 100644
--- a/lxd/cluster/config.go
+++ b/lxd/cluster/config.go
@@ -130,6 +130,18 @@ func (c *Config) ImagesMinimalReplica() int64 {
return c.m.GetInt64("cluster.images_minimal_replica")
}
+// MaxVoters returns the maximum number of members in a cluster that will be
+// assigned the voter role.
+func (c *Config) MaxVoters() int64 {
+ return c.m.GetInt64("cluster.max_voters")
+}
+
+// MaxStandBy returns the maximum number of standby members in a cluster that
+// will be assigned the stand-by role.
+func (c *Config) MaxStandBy() int64 {
+ return c.m.GetInt64("cluster.max_standby")
+}
+
// Dump current configuration keys and their values. Keys with values matching
// their defaults are omitted.
func (c *Config) Dump() map[string]interface{} {
@@ -222,6 +234,8 @@ var ConfigSchema = config.Schema{
"backups.compression_algorithm": {Default: "gzip", Validator: validateCompression},
"cluster.offline_threshold": {Type: config.Int64, Default: offlineThresholdDefault(), Validator: offlineThresholdValidator},
"cluster.images_minimal_replica": {Type: config.Int64, Default: "3", Validator: imageMinimalReplicaValidator},
+ "cluster.max_voters": {Type: config.Int64, Default: "3", Validator: maxVotersValidator},
+ "cluster.max_standby": {Type: config.Int64, Default: "2", Validator: maxStandByValidator},
"core.https_allowed_headers": {},
"core.https_allowed_methods": {},
"core.https_allowed_origin": {},
@@ -292,6 +306,32 @@ func imageMinimalReplicaValidator(value string) error {
return nil
}
+func maxVotersValidator(value string) error {
+ n, err := strconv.Atoi(value)
+ if err != nil {
+ return fmt.Errorf("Value is not a number")
+ }
+
+ if n < 3 || n%2 != 1 {
+ return fmt.Errorf("Value must be an odd number equal to or higher than 3")
+ }
+
+ return nil
+}
+
+func maxStandByValidator(value string) error {
+ n, err := strconv.Atoi(value)
+ if err != nil {
+ return fmt.Errorf("Value is not a number")
+ }
+
+ if n < 0 || n > 5 {
+ return fmt.Errorf("Value must be between 0 and 5")
+ }
+
+ return nil
+}
+
func passwordSetter(value string) (string, error) {
// Nothing to do on unset
if value == "" {
diff --git a/lxd/cluster/config_test.go b/lxd/cluster/config_test.go
index 589e7527e1..4b2ab5e42a 100644
--- a/lxd/cluster/config_test.go
+++ b/lxd/cluster/config_test.go
@@ -64,6 +64,19 @@ func TestConfigLoad_OfflineThresholdValidator(t *testing.T) {
}
+// Max number of voters must be odd.
+func TestConfigLoad_MaxVotersValidator(t *testing.T) {
+ tx, cleanup := db.NewTestClusterTx(t)
+ defer cleanup()
+
+ config, err := cluster.ConfigLoad(tx)
+ require.NoError(t, err)
+
+ _, err = config.Patch(map[string]interface{}{"cluster.max_voters": "4"})
+ require.EqualError(t, err, "cannot set 'cluster.max_voters' to '4': Value must be an odd number equal to or higher than 3")
+
+}
+
// If some previously set values are missing from the ones passed to Replace(),
// they are deleted from the configuration.
func TestConfig_ReplaceDeleteValues(t *testing.T) {
From 1dfa048a28dbaa86669a5e389a5f3af49725241c Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Tue, 11 Feb 2020 09:59:52 +0000
Subject: [PATCH 2/2] Load configuration keys when checking for n of
voters/stand-by
Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
lxd/cluster/membership.go | 30 +++++++++++++++++++-----------
lxd/daemon.go | 18 +++++++++++++++++-
2 files changed, 36 insertions(+), 12 deletions(-)
diff --git a/lxd/cluster/membership.go b/lxd/cluster/membership.go
index 554e68d8ad..63477ab581 100644
--- a/lxd/cluster/membership.go
+++ b/lxd/cluster/membership.go
@@ -156,6 +156,9 @@ func Bootstrap(state *state.State, gateway *Gateway, name string) error {
// Return an updated list raft database nodes (possibly including the newly
// accepted node).
func Accept(state *state.State, gateway *Gateway, name, address string, schema, api, arch int) ([]db.RaftNode, error) {
+ var maxVoters int64
+ var maxStandBy int64
+
// Check parameters
if name == "" {
return nil, fmt.Errorf("node name must not be empty")
@@ -167,8 +170,15 @@ func Accept(state *state.State, gateway *Gateway, name, address string, schema,
// Insert the new node into the nodes table.
var id int64
err := state.Cluster.Transaction(func(tx *db.ClusterTx) error {
+ config, err := ConfigLoad(tx)
+ if err != nil {
+ return errors.Wrap(err, "Load cluster configuration")
+ }
+ maxVoters = config.MaxVoters()
+ maxStandBy = config.MaxStandBy()
+
// Check that the node can be accepted with these parameters.
- err := membershipCheckClusterStateForAccept(tx, name, address, schema, api)
+ err = membershipCheckClusterStateForAccept(tx, name, address, schema, api)
if err != nil {
return err
}
@@ -211,9 +221,9 @@ func Accept(state *state.State, gateway *Gateway, name, address string, schema,
}
}
node := db.RaftNode{ID: uint64(id), Address: address, Role: db.RaftSpare}
- if count > 1 && voters < MaxVoters {
+ if count > 1 && voters < int(maxVoters) {
node.Role = db.RaftVoter
- } else if standbys < MaxStandBys {
+ } else if standbys < int(maxStandBy) {
node.Role = db.RaftStandBy
}
nodes = append(nodes, node)
@@ -490,12 +500,16 @@ func Rebalance(state *state.State, gateway *Gateway) (string, []db.RaftNode, err
// timestamp and check whether they are offline.
nodesByAddress := map[string]db.NodeInfo{}
var offlineThreshold time.Duration
+ var maxVoters int64
+ var maxStandBy int64
err = state.Cluster.Transaction(func(tx *db.ClusterTx) error {
config, err := ConfigLoad(tx)
if err != nil {
return errors.Wrap(err, "failed load cluster configuration")
}
offlineThreshold = config.OfflineThreshold()
+ maxVoters = config.MaxVoters()
+ maxStandBy = config.MaxStandBy()
nodes, err := tx.Nodes()
if err != nil {
return errors.Wrap(err, "failed to get cluster nodes")
@@ -559,12 +573,12 @@ func Rebalance(state *state.State, gateway *Gateway) (string, []db.RaftNode, err
var role db.RaftRole
- if len(voters) < MaxVoters && len(voters) > 1 {
+ if len(voters) < int(maxVoters) && len(voters) > 1 {
role = db.RaftVoter
// Include stand-by nodes among the ones that can be promoted,
// preferring them over spare ones.
candidates = append(standbys, candidates...)
- } else if len(standbys) < MaxStandBys {
+ } else if len(standbys) < int(maxStandBy) {
role = db.RaftStandBy
} else {
// We're already at full capacity or would have a two-member cluster.
@@ -1123,9 +1137,3 @@ func membershipCheckNoLeftoverClusterCert(dir string) error {
// SchemaVersion holds the version of the cluster database schema.
var SchemaVersion = cluster.SchemaVersion
-
-// We currently aim at having 3 voter nodes and 2 stand-by.
-//
-// TODO: these numbers should probably be configurable.
-const MaxVoters = 3
-const MaxStandBys = 2
diff --git a/lxd/daemon.go b/lxd/daemon.go
index 6bdb4958b7..b6da9e7005 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -1478,7 +1478,23 @@ func (d *Daemon) NodeRefreshTask(heartbeatData *cluster.APIHeartbeat) {
// don't have enough voters or standbys, let's see if we can upgrade
// some member.
if len(heartbeatData.Members) > 2 {
- if isDegraded || voters < cluster.MaxVoters || standbys < cluster.MaxStandBys {
+ var maxVoters int64
+ var maxStandBy int64
+ err := d.cluster.Transaction(func(tx *db.ClusterTx) error {
+ config, err := cluster.ConfigLoad(tx)
+ if err != nil {
+ return err
+ }
+ maxVoters = config.MaxVoters()
+ maxStandBy = config.MaxStandBy()
+ return nil
+ })
+ if err != nil {
+ logger.Errorf("Error loading cluster configuration: %v", err)
+ return
+ }
+
+ if isDegraded || voters < int(maxVoters) || standbys < int(maxStandBy) {
go func() {
d.clusterMembershipMutex.Lock()
defer d.clusterMembershipMutex.Unlock()
More information about the lxc-devel
mailing list