[lxc-devel] [lxd/master] Failure domains support
freeekanayaka on Github
lxc-bot at linuxcontainers.org
Thu Jul 2 10:41:55 UTC 2020
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 301 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200702/50f75de5/attachment.bin>
-------------- next part --------------
From 4eece934f4cefaff341d018bf1fedfb1b289e1c8 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Thu, 2 Jul 2020 10:00:44 +0200
Subject: [PATCH 01/10] lxd/db: Add failure_domains table and nodes column
reference
Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
lxd/db/cluster/schema.go | 8 +++++++-
lxd/db/cluster/update.go | 20 ++++++++++++++++++++
2 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/lxd/db/cluster/schema.go b/lxd/db/cluster/schema.go
index 201ed4e1eb..fb36056864 100644
--- a/lxd/db/cluster/schema.go
+++ b/lxd/db/cluster/schema.go
@@ -20,6 +20,11 @@ CREATE TABLE config (
value TEXT,
UNIQUE (key)
);
+CREATE TABLE failure_domains (
+ id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+ name TEXT NOT NULL,
+ UNIQUE (name)
+);
CREATE TABLE "images" (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
fingerprint TEXT NOT NULL,
@@ -311,6 +316,7 @@ CREATE TABLE nodes (
heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP,
pending INTEGER NOT NULL DEFAULT 0,
arch INTEGER NOT NULL DEFAULT 0 CHECK (arch > 0),
+ failure_domain_id INTEGER DEFAULT NULL REFERENCES failure_domains (id) ON DELETE SET NULL,
UNIQUE (name),
UNIQUE (address)
);
@@ -565,5 +571,5 @@ CREATE TABLE storage_volumes_snapshots_config (
UNIQUE (storage_volume_snapshot_id, key)
);
-INSERT INTO schema (version, updated_at) VALUES (31, strftime("%s"))
+INSERT INTO schema (version, updated_at) VALUES (32, strftime("%s"))
`
diff --git a/lxd/db/cluster/update.go b/lxd/db/cluster/update.go
index b6d5d4b1fe..758b569a96 100644
--- a/lxd/db/cluster/update.go
+++ b/lxd/db/cluster/update.go
@@ -68,6 +68,26 @@ var updates = map[int]schema.Update{
29: updateFromV28,
30: updateFromV29,
31: updateFromV30,
+ 32: updateFromV31,
+}
+
+// Add failure_domain column to nodes table.
+func updateFromV31(tx *sql.Tx) error {
+ stmts := `
+CREATE TABLE failure_domains (
+ id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+ name TEXT NOT NULL,
+ UNIQUE (name)
+);
+ALTER TABLE nodes
+ ADD COLUMN failure_domain_id INTEGER DEFAULT NULL REFERENCES failure_domains (id) ON DELETE SET NULL;
+`
+ _, err := tx.Exec(stmts)
+ if err != nil {
+ return err
+ }
+
+ return nil
}
// Add content type field to storage volumes
From c945c182b16941212e9de3e3b502ff0e975d78eb Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Thu, 2 Jul 2020 10:35:29 +0200
Subject: [PATCH 02/10] lxd/db: Add UpdateNodeFailureDomain() and
GetNodesFailureDomains()
Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
lxd/db/node.go | 128 +++++++++++++++++++++++++++++++++++++++++++-
lxd/db/node_test.go | 20 +++++++
2 files changed, 147 insertions(+), 1 deletion(-)
diff --git a/lxd/db/node.go b/lxd/db/node.go
index e0a2c30f41..ec4aad41d3 100644
--- a/lxd/db/node.go
+++ b/lxd/db/node.go
@@ -3,6 +3,7 @@
package db
import (
+ "database/sql"
"fmt"
"strconv"
"strings"
@@ -228,7 +229,7 @@ func (c *ClusterTx) RenameNode(old, new string) error {
// Nodes returns all LXD nodes part of the cluster.
func (c *ClusterTx) nodes(pending bool, where string, args ...interface{}) ([]NodeInfo, error) {
// Get node roles
- sql := "SELECT node_id, role FROM nodes_roles;"
+ sql := "SELECT node_id, role FROM nodes_roles"
nodeRoles := map[int64][]string{}
rows, err := c.tx.Query(sql)
@@ -456,6 +457,131 @@ func (c *ClusterTx) UpdateNodeRoles(id int64, roles []ClusterRole) error {
return nil
}
+// UpdateNodeFailureDomain changes the failure domain of a node.
+func (c *ClusterTx) UpdateNodeFailureDomain(id int64, domain string) error {
+ var domainID interface{}
+
+ if domain == "" {
+ domainID = nil
+ } else {
+ row := c.tx.QueryRow("SELECT id FROM failure_domains WHERE name=?", domain)
+ err := row.Scan(&domainID)
+ if err != nil {
+ if err != sql.ErrNoRows {
+ return errors.Wrapf(err, "Load failure domain name")
+ }
+ result, err := c.tx.Exec("INSERT INTO failure_domains (name) VALUES (?)", domain)
+ if err != nil {
+ return errors.Wrapf(err, "Create new failure domain")
+ }
+ domainID, err = result.LastInsertId()
+ if err != nil {
+ return errors.Wrapf(err, "Get last inserted ID")
+ }
+ }
+ }
+
+ result, err := c.tx.Exec("UPDATE nodes SET failure_domain_id=? WHERE id=?", domainID, id)
+ if err != nil {
+ return err
+ }
+ n, err := result.RowsAffected()
+ if err != nil {
+ return err
+ }
+ if n != 1 {
+ return fmt.Errorf("Query updated %d rows instead of 1", n)
+ }
+
+ return nil
+}
+
+// GetNodeFailureDomain returns the failure domain associated with the node with the given ID.
+func (c *ClusterTx) GetNodeFailureDomain(id int64) (string, error) {
+ stmt := `
+SELECT coalesce(failure_domains.name,'')
+ FROM nodes LEFT JOIN failure_domains ON nodes.failure_domain_id = failure_domains.id
+ WHERE nodes.id=?
+`
+ var domain string
+
+ err := c.tx.QueryRow(stmt, id).Scan(&domain)
+ if err != nil {
+ return "", err
+ }
+ return domain, nil
+}
+
+// GetNodesFailureDomains returns a map associating each node address with its
+// failure domain code.
+func (c *ClusterTx) GetNodesFailureDomains() (map[string]uint64, error) {
+ stmt, err := c.tx.Prepare("SELECT address, coalesce(failure_domain_id, 0) FROM nodes")
+ if err != nil {
+ return nil, err
+ }
+
+ rows := []struct {
+ Address string
+ FailureDomainID int64
+ }{}
+
+ dest := func(i int) []interface{} {
+ rows = append(rows, struct {
+ Address string
+ FailureDomainID int64
+ }{})
+ return []interface{}{&rows[len(rows)-1].Address, &rows[len(rows)-1].FailureDomainID}
+ }
+
+ err = query.SelectObjects(stmt, dest)
+ if err != nil {
+ return nil, err
+ }
+
+ domains := map[string]uint64{}
+
+ for _, row := range rows {
+ domains[row.Address] = uint64(row.FailureDomainID)
+ }
+
+ return domains, nil
+}
+
+// GetFailureDomainsNames return a map associating failure domain IDs to their
+// names.
+func (c *ClusterTx) GetFailureDomainsNames() (map[uint64]string, error) {
+ stmt, err := c.tx.Prepare("SELECT id, name FROM failure_domains")
+ if err != nil {
+ return nil, err
+ }
+
+ rows := []struct {
+ ID int64
+ Name string
+ }{}
+
+ dest := func(i int) []interface{} {
+ rows = append(rows, struct {
+ ID int64
+ Name string
+ }{})
+ return []interface{}{&rows[len(rows)-1].ID, &rows[len(rows)-1].Name}
+ }
+
+ err = query.SelectObjects(stmt, dest)
+ if err != nil {
+ return nil, err
+ }
+
+ domains := map[uint64]string{}
+
+ for _, row := range rows {
+ domains[uint64(row.ID)] = row.Name
+ }
+
+ return domains, nil
+}
+
// RemoveNode removes the node with the given id.
func (c *ClusterTx) RemoveNode(id int64) error {
result, err := c.tx.Exec("DELETE FROM nodes WHERE id=?", id)
diff --git a/lxd/db/node_test.go b/lxd/db/node_test.go
index 99c1c47f34..866053255f 100644
--- a/lxd/db/node_test.go
+++ b/lxd/db/node_test.go
@@ -391,3 +391,23 @@ INSERT INTO instances (id, node_id, name, architecture, type, project_id) VALUES
require.NoError(t, err)
assert.Equal(t, "none", name)
}
+
+func TestUpdateNodeFailureDomain(t *testing.T) {
+ tx, cleanup := db.NewTestClusterTx(t)
+ defer cleanup()
+
+ id, err := tx.CreateNode("buzz", "1.2.3.4:666")
+ require.NoError(t, err)
+
+ assert.NoError(t, tx.UpdateNodeFailureDomain(id, "foo"))
+
+ domains, err := tx.GetNodesFailureDomains()
+ require.NoError(t, err)
+ assert.Equal(t, map[string]uint64{"0.0.0.0": 0, "1.2.3.4:666": 1}, domains)
+
+ assert.NoError(t, tx.UpdateNodeFailureDomain(id, ""))
+
+ domains, err = tx.GetNodesFailureDomains()
+ require.NoError(t, err)
+ assert.Equal(t, map[string]uint64{"0.0.0.0": 0, "1.2.3.4:666": 0}, domains)
+}
From 197a19fc1d6895a839fda8d7b9f36a55bedd12ae Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Thu, 2 Jul 2020 10:39:42 +0200
Subject: [PATCH 03/10] lxd/cluster: Honor failure domains when changing roles
Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
lxd/cluster/membership.go | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/lxd/cluster/membership.go b/lxd/cluster/membership.go
index 978d8877d1..135f3417cb 100644
--- a/lxd/cluster/membership.go
+++ b/lxd/cluster/membership.go
@@ -824,6 +824,8 @@ func Handover(state *state.State, gateway *Gateway, address string) (string, []d
func newRolesChanges(state *state.State, gateway *Gateway, nodes []db.RaftNode) (*app.RolesChanges, error) {
var maxVoters int
var maxStandBy int
+ var domains map[string]uint64
+
err := state.Cluster.Transaction(func(tx *db.ClusterTx) error {
config, err := ConfigLoad(tx)
if err != nil {
@@ -831,6 +833,12 @@ func newRolesChanges(state *state.State, gateway *Gateway, nodes []db.RaftNode)
}
maxVoters = int(config.MaxVoters())
maxStandBy = int(config.MaxStandBy())
+
+ domains, err = tx.GetNodesFailureDomains()
+ if err != nil {
+ return errors.Wrap(err, "Load failure domains")
+ }
+
return nil
})
if err != nil {
@@ -841,7 +849,9 @@ func newRolesChanges(state *state.State, gateway *Gateway, nodes []db.RaftNode)
for _, node := range nodes {
if HasConnectivity(gateway.cert, node.Address) {
- cluster[node] = &client.NodeMetadata{}
+ cluster[node] = &client.NodeMetadata{
+ FailureDomain: domains[node.Address],
+ }
} else {
cluster[node] = nil
}
From 68e5d4006f61446d56dd4592d524296365520954 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Thu, 2 Jul 2020 12:20:58 +0200
Subject: [PATCH 04/10] shared/version: Add clustering_failure_domains
extension
Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
doc/api-extensions.md | 5 +++++
shared/version/api.go | 1 +
2 files changed, 6 insertions(+)
diff --git a/doc/api-extensions.md b/doc/api-extensions.md
index 9a34d7871e..ea699c5d7f 100644
--- a/doc/api-extensions.md
+++ b/doc/api-extensions.md
@@ -1093,3 +1093,8 @@ The 5 entities that have UsedBy are:
This adds support for creating and attaching custom block volumes to instances.
It introduces the new `--type` flag when creating custom storage volumes, and accepts the values `fs` and `block`.
+
+## clustering\_failure\_domains
+
+This extension adds a new `failure_domain` field to the `PUT /1.0/cluster/<node>` API,
+which can be used to set the failure domain of a node.
diff --git a/shared/version/api.go b/shared/version/api.go
index 6b5a9909a9..52744904a4 100644
--- a/shared/version/api.go
+++ b/shared/version/api.go
@@ -216,6 +216,7 @@ var APIExtensions = []string{
"network_state_bond_bridge",
"usedby_consistency",
"custom_block_volumes",
+ "clustering_failure_domains",
}
// APIExtensionsCount returns the number of available API extensions.
From 3ea751dde65009cf6576b45eb70db896fa79b544 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Thu, 2 Jul 2020 12:21:47 +0200
Subject: [PATCH 05/10] shared/api: Add FailureDomain field to ClusterMemberPut
Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
shared/api/cluster.go | 3 +++
1 file changed, 3 insertions(+)
diff --git a/shared/api/cluster.go b/shared/api/cluster.go
index 42808d98c6..96c8d60909 100644
--- a/shared/api/cluster.go
+++ b/shared/api/cluster.go
@@ -75,4 +75,7 @@ type ClusterMemberPut struct {
// API extension: clustering_architecture
Architecture string `json:"architecture" yaml:"architecture"`
+
+ // API extension: clustering_failure_domains
+ FailureDomain string `json:"failure_domain" yaml:"failure_domain"`
}
From cb8b33f7b5a8fdc21757e0a1cdac3a6549c6306c Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Thu, 2 Jul 2020 12:22:40 +0200
Subject: [PATCH 06/10] lxd/cluster: Populate FailureDomain field when listing
cluster members
Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
lxd/cluster/membership.go | 23 +++++++++++++++++++++--
1 file changed, 21 insertions(+), 2 deletions(-)
diff --git a/lxd/cluster/membership.go b/lxd/cluster/membership.go
index 135f3417cb..3c475deff3 100644
--- a/lxd/cluster/membership.go
+++ b/lxd/cluster/membership.go
@@ -898,16 +898,34 @@ func List(state *state.State, gateway *Gateway) ([]api.ClusterMember, error) {
var err error
var nodes []db.NodeInfo
var offlineThreshold time.Duration
+ domains := map[string]string{}
err = state.Cluster.Transaction(func(tx *db.ClusterTx) error {
nodes, err = tx.GetNodes()
if err != nil {
- return err
+ return errors.Wrap(err, "Load nodes")
}
offlineThreshold, err = tx.GetNodeOfflineThreshold()
if err != nil {
- return err
+ return errors.Wrap(err, "Load offline threshold config")
+ }
+
+ nodesDomains, err := tx.GetNodesFailureDomains()
+ if err != nil {
+ return errors.Wrap(err, "Load nodes failure domains")
+ }
+
+ domainsNames, err := tx.GetFailureDomainsNames()
+ if err != nil {
+ return errors.Wrap(err, "Load failure domains names")
+ }
+
+ for _, node := range nodes {
+ domainID := nodesDomains[node.Address]
+ if domainID != 0 {
+ domains[node.Address] = domainsNames[domainID]
+ }
}
return nil
@@ -956,6 +974,7 @@ func List(state *state.State, gateway *Gateway) ([]api.ClusterMember, error) {
if err != nil {
return nil, err
}
+ result[i].FailureDomain = domains[node.Address]
if node.IsOffline(offlineThreshold) {
result[i].Status = "Offline"
From f44648765fb1271b3dc9955919473cf15af68e60 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Thu, 2 Jul 2020 12:23:22 +0200
Subject: [PATCH 07/10] lxd: Support changing failure domain in PUT
/1.0/cluster/<node>
Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
client/lxd_cluster.go | 5 +++++
lxd/api_cluster.go | 21 ++++++++++++++++++---
2 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/client/lxd_cluster.go b/client/lxd_cluster.go
index 7922ce1149..f67940b64f 100644
--- a/client/lxd_cluster.go
+++ b/client/lxd_cluster.go
@@ -113,6 +113,11 @@ func (r *ProtocolLXD) UpdateClusterMember(name string, member api.ClusterMemberP
if !r.HasExtension("clustering_edit_roles") {
return fmt.Errorf("The server is missing the required \"clustering_edit_roles\" API extension")
}
+ if member.FailureDomain != "" {
+ if !r.HasExtension("clustering_failure_domains") {
+ return fmt.Errorf("The server is missing the required \"clustering_failure_domains\" API extension")
+ }
+ }
// Send the request
_, _, err := r.query("PUT", fmt.Sprintf("/cluster/members/%s", name), member, ETag)
diff --git a/lxd/api_cluster.go b/lxd/api_cluster.go
index 4632cb8dbc..d8cfb0a74c 100644
--- a/lxd/api_cluster.go
+++ b/lxd/api_cluster.go
@@ -887,11 +887,17 @@ func clusterNodePut(d *Daemon, r *http.Request) response.Response {
// Find the requested one.
var current db.NodeInfo
+ var currentFailureDomain string
var err error
err = d.cluster.Transaction(func(tx *db.ClusterTx) error {
current, err = tx.GetNodeByName(name)
if err != nil {
- return err
+ return errors.Wrap(err, "Load current node state")
+ }
+
+ currentFailureDomain, err = tx.GetNodeFailureDomain(current.ID)
+ if err != nil {
+ return errors.Wrap(err, "Load current failure domain")
}
return nil
@@ -901,7 +907,11 @@ func clusterNodePut(d *Daemon, r *http.Request) response.Response {
}
// Validate the request is fine
- err = util.EtagCheck(r, current.Roles)
+ etag := []interface{}{
+ current.Roles,
+ currentFailureDomain,
+ }
+ err = util.EtagCheck(r, etag)
if err != nil {
return response.PreconditionFailed(err)
}
@@ -932,7 +942,12 @@ func clusterNodePut(d *Daemon, r *http.Request) response.Response {
err := tx.UpdateNodeRoles(current.ID, dbRoles)
if err != nil {
- return err
+ return errors.Wrap(err, "Update roles")
+ }
+
+ err = tx.UpdateNodeFailureDomain(current.ID, req.FailureDomain)
+ if err != nil {
+ return errors.Wrap(err, "Update failure domain")
}
return nil
From ebd2459a5b8244546ef597496288b62b5efe0d5f Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Thu, 2 Jul 2020 12:23:49 +0200
Subject: [PATCH 08/10] test: Add new clustering_failure_domains test case
Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
test/main.sh | 1 +
test/suites/clustering.sh | 91 +++++++++++++++++++++++++++++++++++++++
2 files changed, 92 insertions(+)
diff --git a/test/main.sh b/test/main.sh
index f51a0ee0eb..364b203af2 100755
--- a/test/main.sh
+++ b/test/main.sh
@@ -182,6 +182,7 @@ run_test test_clustering_recover "clustering recovery"
run_test test_clustering_handover "clustering handover"
run_test test_clustering_rebalance "clustering rebalance"
run_test test_clustering_remove_raft_node "custering remove raft node"
+run_test test_clustering_failure_domains "failure domains"
# run_test test_clustering_upgrade "clustering upgrade"
run_test test_projects_default "default project"
run_test test_projects_crud "projects CRUD operations"
diff --git a/test/suites/clustering.sh b/test/suites/clustering.sh
index 49631e8d8f..240eee66d9 100644
--- a/test/suites/clustering.sh
+++ b/test/suites/clustering.sh
@@ -1901,3 +1901,94 @@ test_clustering_remove_raft_node() {
kill_lxd "${LXD_THREE_DIR}"
kill_lxd "${LXD_FOUR_DIR}"
}
+
+test_clustering_failure_domains() {
+ # shellcheck disable=2039
+ local LXD_DIR
+
+ setup_clustering_bridge
+ prefix="lxd$$"
+ bridge="${prefix}"
+
+ setup_clustering_netns 1
+ LXD_ONE_DIR=$(mktemp -d -p "${TEST_DIR}" XXX)
+ chmod +x "${LXD_ONE_DIR}"
+ ns1="${prefix}1"
+ spawn_lxd_and_bootstrap_cluster "${ns1}" "${bridge}" "${LXD_ONE_DIR}"
+
+ # Add a newline at the end of each line. YAML as weird rules..
+ cert=$(sed ':a;N;$!ba;s/\n/\n\n/g' "${LXD_ONE_DIR}/server.crt")
+
+ # Spawn a second node
+ setup_clustering_netns 2
+ LXD_TWO_DIR=$(mktemp -d -p "${TEST_DIR}" XXX)
+ chmod +x "${LXD_TWO_DIR}"
+ ns2="${prefix}2"
+ spawn_lxd_and_join_cluster "${ns2}" "${bridge}" "${cert}" 2 1 "${LXD_TWO_DIR}"
+
+ # Spawn a third node, using the non-leader node2 as join target.
+ setup_clustering_netns 3
+ LXD_THREE_DIR=$(mktemp -d -p "${TEST_DIR}" XXX)
+ chmod +x "${LXD_THREE_DIR}"
+ ns3="${prefix}3"
+ spawn_lxd_and_join_cluster "${ns3}" "${bridge}" "${cert}" 3 2 "${LXD_THREE_DIR}"
+
+ # Spawn a fourth node, this will be a non-database node.
+ setup_clustering_netns 4
+ LXD_FOUR_DIR=$(mktemp -d -p "${TEST_DIR}" XXX)
+ chmod +x "${LXD_FOUR_DIR}"
+ ns4="${prefix}4"
+ spawn_lxd_and_join_cluster "${ns4}" "${bridge}" "${cert}" 4 1 "${LXD_FOUR_DIR}"
+
+ # Spawn a fifth node, using non-database node4 as join target.
+ setup_clustering_netns 5
+ LXD_FIVE_DIR=$(mktemp -d -p "${TEST_DIR}" XXX)
+ chmod +x "${LXD_FIVE_DIR}"
+ ns5="${prefix}5"
+ spawn_lxd_and_join_cluster "${ns5}" "${bridge}" "${cert}" 5 4 "${LXD_FIVE_DIR}"
+
+ # Spawn a sixth node, using non-database node4 as join target.
+ setup_clustering_netns 6
+ LXD_SIX_DIR=$(mktemp -d -p "${TEST_DIR}" XXX)
+ chmod +x "${LXD_SIX_DIR}"
+ ns6="${prefix}6"
+ spawn_lxd_and_join_cluster "${ns6}" "${bridge}" "${cert}" 6 4 "${LXD_SIX_DIR}"
+
+ # Set failure domains
+ echo -e "roles: [\"database\"]\nfailure_domain: \"az1\"" | LXD_DIR="${LXD_THREE_DIR}" lxc cluster edit node1
+ echo -e "roles: [\"database\"]\nfailure_domain: \"az2\"" | LXD_DIR="${LXD_THREE_DIR}" lxc cluster edit node2
+ echo -e "roles: [\"database\"]\nfailure_domain: \"az3\"" | LXD_DIR="${LXD_THREE_DIR}" lxc cluster edit node3
+ echo -e "roles: []\nfailure_domain: \"az1\"" | LXD_DIR="${LXD_THREE_DIR}" lxc cluster edit node4
+ echo -e "roles: []\nfailure_domain: \"az2\"" | LXD_DIR="${LXD_THREE_DIR}" lxc cluster edit node5
+ echo -e "roles: []\nfailure_domain: \"az3\"" | LXD_DIR="${LXD_THREE_DIR}" lxc cluster edit node6
+
+ # Shutdown a node in az2, its replacement is picked from az2.
+ LXD_DIR="${LXD_TWO_DIR}" lxd shutdown
+ sleep 3
+
+ LXD_DIR="${LXD_ONE_DIR}" lxc cluster show node2 | grep -q "database: false"
+ LXD_DIR="${LXD_ONE_DIR}" lxc cluster show node5 | grep -q "database: true"
+
+ LXD_DIR="${LXD_SIX_DIR}" lxd shutdown
+ LXD_DIR="${LXD_FIVE_DIR}" lxd shutdown
+ LXD_DIR="${LXD_FOUR_DIR}" lxd shutdown
+ LXD_DIR="${LXD_THREE_DIR}" lxd shutdown
+ LXD_DIR="${LXD_ONE_DIR}" lxd shutdown
+ sleep 0.5
+ rm -f "${LXD_SIX_DIR}/unix.socket"
+ rm -f "${LXD_FIVE_DIR}/unix.socket"
+ rm -f "${LXD_FOUR_DIR}/unix.socket"
+ rm -f "${LXD_THREE_DIR}/unix.socket"
+ rm -f "${LXD_TWO_DIR}/unix.socket"
+ rm -f "${LXD_ONE_DIR}/unix.socket"
+
+ teardown_clustering_netns
+ teardown_clustering_bridge
+
+ kill_lxd "${LXD_ONE_DIR}"
+ kill_lxd "${LXD_TWO_DIR}"
+ kill_lxd "${LXD_THREE_DIR}"
+ kill_lxd "${LXD_FOUR_DIR}"
+ kill_lxd "${LXD_FIVE_DIR}"
+ kill_lxd "${LXD_SIX_DIR}"
+}
From a0120ab3234bd80280356e8c6487df8b8dc026d1 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Thu, 2 Jul 2020 12:27:34 +0200
Subject: [PATCH 09/10] doc: Add documentation about failure domains
Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
doc/clustering.md | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/doc/clustering.md b/doc/clustering.md
index cb90f4ed55..e7137350a1 100644
--- a/doc/clustering.md
+++ b/doc/clustering.md
@@ -217,6 +217,17 @@ transition to the Blocked state, until you upgrade the very last
one. At that point the blocked nodes will notice that there is no
out-of-date node left and will become operational again.
+### Failure domains
+
+Failure domains can be used to indicate which nodes should be given preference
+when trying to assign roles to a cluster member that has been shutdown or has
+crashed. For example, if a cluster member that currently has the database role
+gets shutdown, LXD will try to assign its database role to another cluster
+member in the same failure domain, if one is available.
+
+To change the failure domain of a cluster member you can use the `lxc cluster
+edit <member>` command line tool, or the `PUT /1.0/cluster/<member>` REST API.
+
### Recover from quorum loss
Every LXD cluster has up to 3 members that serve as database nodes. If you
From b6d795a3555960e8d2871480b72485dbfa3ffa33 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Thu, 2 Jul 2020 12:33:42 +0200
Subject: [PATCH 10/10] lxc: Add failure domain column in "lxc cluster list"
output
Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
lxc/cluster.go | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lxc/cluster.go b/lxc/cluster.go
index eacd4ae36f..9d605990ce 100644
--- a/lxc/cluster.go
+++ b/lxc/cluster.go
@@ -123,7 +123,7 @@ func (c *cmdClusterList) Run(cmd *cobra.Command, args []string) error {
if member.Database {
database = "YES"
}
- line := []string{member.ServerName, member.URL, database, strings.ToUpper(member.Status), member.Message, member.Architecture}
+ line := []string{member.ServerName, member.URL, database, strings.ToUpper(member.Status), member.Message, member.Architecture, member.FailureDomain}
data = append(data, line)
}
sort.Sort(byName(data))
@@ -135,6 +135,7 @@ func (c *cmdClusterList) Run(cmd *cobra.Command, args []string) error {
i18n.G("STATE"),
i18n.G("MESSAGE"),
i18n.G("ARCHITECTURE"),
+ i18n.G("FAILURE DOMAIN"),
}
return utils.RenderTable(c.flagFormat, header, data, members)
More information about the lxc-devel
mailing list