[lxc-devel] [lxd/master] [DRAFT] Multi architecture clustering

DBaum1 on Github lxc-bot at linuxcontainers.org
Thu Dec 5 07:54:56 UTC 2019


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 429 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20191204/4768ac00/attachment.bin>
-------------- next part --------------
From 0c46b5ad924f82328eae1066671eb08f0853354c Mon Sep 17 00:00:00 2001
From: Dinah Baum <dinahbaum123 at gmail.com>
Date: Thu, 5 Dec 2019 01:33:20 -0600
Subject: [PATCH 1/7] lxc: Add ARCH column and state information to cluster
 list

Signed-off-by: Dinah Baum <dinahbaum123 at gmail.com>
---
 lxc/cluster.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lxc/cluster.go b/lxc/cluster.go
index c147e54155..6231de73d6 100644
--- a/lxc/cluster.go
+++ b/lxc/cluster.go
@@ -117,7 +117,7 @@ func (c *cmdClusterList) Run(cmd *cobra.Command, args []string) error {
 		if member.Database {
 			database = "YES"
 		}
-		line := []string{member.ServerName, member.URL, database, strings.ToUpper(member.Status), member.Message}
+		line := []string{member.ServerName, member.URL, database, strings.ToUpper(member.Status), member.Message, member.Arch}
 		data = append(data, line)
 	}
 	sort.Sort(byName(data))
@@ -128,6 +128,7 @@ func (c *cmdClusterList) Run(cmd *cobra.Command, args []string) error {
 		i18n.G("DATABASE"),
 		i18n.G("STATE"),
 		i18n.G("MESSAGE"),
+		i18n.G("ARCH"),
 	}
 
 	return utils.RenderTable(c.flagFormat, header, data, members)

From dea57e3eca26d1bcf8b9509841b7ec4c850673c8 Mon Sep 17 00:00:00 2001
From: Dinah Baum <dinahbaum123 at gmail.com>
Date: Thu, 5 Dec 2019 01:34:09 -0600
Subject: [PATCH 2/7] lxd/cluster: fix cluster.Accept() calls

Signed-off-by: Dinah Baum <dinahbaum123 at gmail.com>
---
 lxd/cluster/heartbeat_test.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lxd/cluster/heartbeat_test.go b/lxd/cluster/heartbeat_test.go
index 4b4e247dc0..30fddb3371 100644
--- a/lxd/cluster/heartbeat_test.go
+++ b/lxd/cluster/heartbeat_test.go
@@ -142,9 +142,8 @@ func (f *heartbeatFixture) Grow() *cluster.Gateway {
 
 	state, gateway, address := f.node()
 	name := address
-
 	nodes, err := cluster.Accept(
-		targetState, target, name, address, cluster.SchemaVersion, len(version.APIExtensions))
+		targetState, target, name, address, cluster.SchemaVersion, len(version.APIExtensions), 1)
 	require.NoError(f.t, err)
 
 	err = cluster.Join(state, gateway, target.Cert(), name, nodes)

From 92461624e87fe1dab890276a8015814209c69bec Mon Sep 17 00:00:00 2001
From: Dinah Baum <dinahbaum123 at gmail.com>
Date: Thu, 5 Dec 2019 01:34:26 -0600
Subject: [PATCH 3/7] lxd/cluster: fix cluster.Accept() calls

Signed-off-by: Dinah Baum <dinahbaum123 at gmail.com>
---
 lxd/cluster/membership_test.go | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/lxd/cluster/membership_test.go b/lxd/cluster/membership_test.go
index 5462f7fc3e..bc9c5bb3aa 100644
--- a/lxd/cluster/membership_test.go
+++ b/lxd/cluster/membership_test.go
@@ -203,8 +203,7 @@ func TestAccept_UnmetPreconditions(t *testing.T) {
 			defer gateway.Shutdown()
 
 			c.setup(&membershipFixtures{t: t, state: state})
-
-			_, err := cluster.Accept(state, gateway, c.name, c.address, c.schema, c.api)
+			_, err := cluster.Accept(state, gateway, c.name, c.address, c.schema, c.api, 1)
 			assert.EqualError(t, err, c.error)
 		})
 	}
@@ -222,9 +221,8 @@ func TestAccept(t *testing.T) {
 	f := &membershipFixtures{t: t, state: state}
 	f.RaftNode("1.2.3.4:666")
 	f.ClusterNode("1.2.3.4:666")
-
 	nodes, err := cluster.Accept(
-		state, gateway, "buzz", "5.6.7.8:666", cluster.SchemaVersion, len(version.APIExtensions))
+		state, gateway, "buzz", "5.6.7.8:666", cluster.SchemaVersion, len(version.APIExtensions), 1)
 	assert.NoError(t, err)
 	assert.Len(t, nodes, 2)
 	assert.Equal(t, int64(1), nodes[0].ID)
@@ -306,7 +304,7 @@ func TestJoin(t *testing.T) {
 
 	// Accept the joining node.
 	raftNodes, err := cluster.Accept(
-		targetState, targetGateway, "rusp", address, cluster.SchemaVersion, len(version.APIExtensions))
+		targetState, targetGateway, "rusp", address, cluster.SchemaVersion, len(version.APIExtensions), 1)
 	require.NoError(t, err)
 
 	// Actually join the cluster.

From 728844f6cb6e7062e819bbcbea14e357a15d5aef Mon Sep 17 00:00:00 2001
From: Dinah Baum <dinahbaum123 at gmail.com>
Date: Thu, 5 Dec 2019 01:34:54 -0600
Subject: [PATCH 4/7] lxd/cluster: lxd/cluster: Add Arch

Signed-off-by: Dinah Baum <dinahbaum123 at gmail.com>
---
 lxd/cluster/membership.go | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/lxd/cluster/membership.go b/lxd/cluster/membership.go
index b572400553..b55c5d2e95 100644
--- a/lxd/cluster/membership.go
+++ b/lxd/cluster/membership.go
@@ -18,9 +18,9 @@ import (
 	"github.com/lxc/lxd/shared/api"
 	"github.com/lxc/lxd/shared/log15"
 	"github.com/lxc/lxd/shared/logger"
-	"github.com/lxc/lxd/shared/osarch"
 	"github.com/lxc/lxd/shared/version"
 	"github.com/pkg/errors"
+	"github.com/lxc/lxd/shared/osarch"
 )
 
 // Bootstrap turns a non-clustered LXD instance into the first (and leader)
@@ -156,7 +156,7 @@ func Bootstrap(state *state.State, gateway *Gateway, name string) error {
 //
 // Return an updated list raft database nodes (possibly including the newly
 // accepted node).
-func Accept(state *state.State, gateway *Gateway, name, address string, schema, api int) ([]db.RaftNode, error) {
+func Accept(state *state.State, gateway *Gateway, name, address string, schema, api, arch int) ([]db.RaftNode, error) {
 	// Check parameters
 	if name == "" {
 		return nil, fmt.Errorf("node name must not be empty")
@@ -173,13 +173,6 @@ func Accept(state *state.State, gateway *Gateway, name, address string, schema,
 			return err
 		}
 
-		// TODO: when fixing #6380 this should be replaced with the
-		// actual architecture of the foreign node.
-		arch, err := osarch.ArchitectureGetLocalID()
-		if err != nil {
-			return err
-		}
-
 		// Add the new node
 		id, err := tx.NodeAddWithArch(name, address, arch)
 		if err != nil {
@@ -223,7 +216,6 @@ func Accept(state *state.State, gateway *Gateway, name, address string, schema,
 			return nil, errors.Wrap(err, "Failed to insert new node into raft_nodes")
 		}
 	}
-
 	return nodes, nil
 }
 
@@ -818,6 +810,10 @@ func List(state *state.State) ([]api.ClusterMember, error) {
 		result[i].URL = fmt.Sprintf("https://%s", node.Address)
 		result[i].Database = shared.StringInSlice(string(db.ClusterRoleDatabase), node.Roles)
 		result[i].Roles = node.Roles
+		result[i].Arch, err = osarch.ArchitectureName(node.Arch)
+		if err != nil {
+			return nil, err
+		}
 		if node.IsOffline(offlineThreshold) {
 			result[i].Status = "Offline"
 			result[i].Message = fmt.Sprintf(

From a15b35a05e1eec6e95fa742dec32db9b92277a81 Mon Sep 17 00:00:00 2001
From: Dinah Baum <dinahbaum123 at gmail.com>
Date: Thu, 5 Dec 2019 01:35:20 -0600
Subject: [PATCH 5/7] lxd/db: add Arch to NodeInfo

Signed-off-by: Dinah Baum <dinahbaum123 at gmail.com>
---
 lxd/db/node.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lxd/db/node.go b/lxd/db/node.go
index 7bbf423f3a..a4bd2c9bd9 100644
--- a/lxd/db/node.go
+++ b/lxd/db/node.go
@@ -37,6 +37,7 @@ type NodeInfo struct {
 	APIExtensions int       // Number of API extensions of the LXD code running on the node
 	Heartbeat     time.Time // Timestamp of the last heartbeat
 	Roles         []string  // List of cluster roles
+	Arch          int       // Node architecture
 }
 
 // IsOffline returns true if the last successful heartbeat time of the node is
@@ -270,6 +271,7 @@ func (c *ClusterTx) nodes(pending bool, where string, args ...interface{}) ([]No
 			&nodes[i].Schema,
 			&nodes[i].APIExtensions,
 			&nodes[i].Heartbeat,
+			&nodes[i].Arch,
 		}
 	}
 	if pending {
@@ -279,7 +281,7 @@ func (c *ClusterTx) nodes(pending bool, where string, args ...interface{}) ([]No
 	}
 
 	// Get the node entries
-	sql = "SELECT id, name, address, description, schema, api_extensions, heartbeat FROM nodes WHERE pending=?"
+	sql = "SELECT id, name, address, description, schema, api_extensions, heartbeat, arch FROM nodes WHERE pending=?"
 	if where != "" {
 		sql += fmt.Sprintf("AND %s ", where)
 	}
@@ -580,6 +582,7 @@ func (c *ClusterTx) NodeOfflineThreshold() (time.Duration, error) {
 // the least number of containers (either already created or being created with
 // an operation).
 func (c *ClusterTx) NodeWithLeastContainers() (string, error) {
+	fmt.Println("NodeWithLeastContainers")
 	threshold, err := c.NodeOfflineThreshold()
 	if err != nil {
 		return "", errors.Wrap(err, "failed to get offline threshold")

From a874b95e1da78193d02181451a1859939a14b006 Mon Sep 17 00:00:00 2001
From: Dinah Baum <dinahbaum123 at gmail.com>
Date: Thu, 5 Dec 2019 01:37:28 -0600
Subject: [PATCH 6/7]  shared/api: Add Arch

Signed-off-by: Dinah Baum <dinahbaum123 at gmail.com>
---
 shared/api/cluster.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/shared/api/cluster.go b/shared/api/cluster.go
index c773929c8d..ed86786f2d 100644
--- a/shared/api/cluster.go
+++ b/shared/api/cluster.go
@@ -60,4 +60,5 @@ type ClusterMember struct {
 
 	// API extension: clustering_roles
 	Roles []string `json:"roles" yaml:"roles"`
+	Arch    string `json:"arch" yaml:"arch"`
 }

From 5bb4dbe117696ca83a4f6fd58a003ca18a1f9149 Mon Sep 17 00:00:00 2001
From: Dinah Baum <dinahbaum123 at gmail.com>
Date: Thu, 5 Dec 2019 01:41:09 -0600
Subject: [PATCH 7/7] lxd: AcceptMember sets a node's architecture

Signed-off-by: Dinah Baum <dinahbaum123 at gmail.com>
---
 lxd/api_cluster.go | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/lxd/api_cluster.go b/lxd/api_cluster.go
index 209fb08ed4..4c8075ad3c 100644
--- a/lxd/api_cluster.go
+++ b/lxd/api_cluster.go
@@ -27,6 +27,7 @@ import (
 	"github.com/lxc/lxd/shared"
 	"github.com/lxc/lxd/shared/api"
 	"github.com/lxc/lxd/shared/logger"
+	"github.com/lxc/lxd/shared/osarch"
 	"github.com/lxc/lxd/shared/version"
 )
 
@@ -608,11 +609,10 @@ func clusterPutJoin(d *Daemon, req api.ClusterPut) response.Response {
 		// Add the cluster flag from the agent
 		version.UserAgentFeatures([]string{"cluster"})
 
-		client, err = cluster.Connect(req.ClusterAddress, d.endpoints.NetworkCert(), true)
+		client, err = cluster.Connect(req.ClusterAddress, d.endpoints.NetworkCert(), false)
 		if err != nil {
 			return err
 		}
-
 		err = clusterRebalance(client)
 		if err != nil {
 			return err
@@ -807,7 +807,10 @@ func clusterAcceptMember(
 	client lxd.InstanceServer,
 	name, address string, schema, apiExt int,
 	pools []api.StoragePool, networks []api.Network) (*internalClusterPostAcceptResponse, error) {
-
+	var arch, err = osarch.ArchitectureGetLocalID()
+	if err != nil {
+		return nil, err
+	}
 	req := internalClusterPostAcceptRequest{
 		Name:         name,
 		Address:      address,
@@ -815,6 +818,7 @@ func clusterAcceptMember(
 		API:          apiExt,
 		StoragePools: pools,
 		Networks:     networks,
+		Arch:         arch,
 	}
 	info := &internalClusterPostAcceptResponse{}
 	resp, _, err := client.RawQuery("POST", "/internal/cluster/accept", req, "")
@@ -923,12 +927,10 @@ func clusterNodeDelete(d *Daemon, r *http.Request) response.Response {
 		if err != nil {
 			return response.SmartError(err)
 		}
-
 		networks, err := d.cluster.Networks()
 		if err != nil {
 			return response.SmartError(err)
 		}
-
 		for _, name := range networks {
 			err := client.DeleteNetwork(name)
 			if err != nil {
@@ -941,7 +943,6 @@ func clusterNodeDelete(d *Daemon, r *http.Request) response.Response {
 		if err != nil && err != db.ErrNoSuchObject {
 			return response.SmartError(err)
 		}
-
 		for _, name := range pools {
 			err := client.DeleteStoragePool(name)
 			if err != nil {
@@ -965,11 +966,10 @@ func clusterNodeDelete(d *Daemon, r *http.Request) response.Response {
 	if force != 1 {
 		// Try to gracefully reset the database on the node.
 		cert := d.endpoints.NetworkCert()
-		client, err := cluster.Connect(address, cert, true)
+		client, err := cluster.Connect(address, cert, false)
 		if err != nil {
 			return response.SmartError(err)
 		}
-
 		put := api.ClusterPut{}
 		put.Enabled = false
 		_, err = client.UpdateCluster(put, "")
@@ -987,19 +987,17 @@ func tryClusterRebalance(d *Daemon) error {
 	leader, err := d.gateway.LeaderAddress()
 	if err != nil {
 		// This is not a fatal error, so let's just log it.
-		return errors.Wrap(err, "Failed to get current leader member")
+		return errors.Wrap(err, "failed to get current leader member")
 	}
 	cert := d.endpoints.NetworkCert()
 	client, err := cluster.Connect(leader, cert, true)
 	if err != nil {
-		return errors.Wrap(err, "Failed to connect to leader member")
+		return errors.Wrap(err, "failed to connect to leader member")
 	}
-
 	_, _, err = client.RawQuery("POST", "/internal/cluster/rebalance", nil, "")
 	if err != nil {
-		return errors.Wrap(err, "Request to rebalance cluster failed")
+		return errors.Wrap(err, "request to rebalance cluster failed")
 	}
-
 	return nil
 }
 
@@ -1048,7 +1046,7 @@ func internalClusterPostAccept(d *Daemon, r *http.Request) response.Response {
 		return response.SmartError(err)
 	}
 
-	nodes, err := cluster.Accept(d.State(), d.gateway, req.Name, req.Address, req.Schema, req.API)
+	nodes, err := cluster.Accept(d.State(), d.gateway, req.Name, req.Address, req.Schema, req.API, req.Arch)
 	if err != nil {
 		return response.BadRequest(err)
 	}
@@ -1071,6 +1069,7 @@ type internalClusterPostAcceptRequest struct {
 	API          int               `json:"api" yaml:"api"`
 	StoragePools []api.StoragePool `json:"storage_pools" yaml:"storage_pools"`
 	Networks     []api.Network     `json:"networks" yaml:"networks"`
+	Arch         int               `json:"arch" yaml:"arch"`
 }
 
 // A Response for the /internal/cluster/accept endpoint.
@@ -1162,11 +1161,10 @@ func internalClusterPostRebalance(d *Daemon, r *http.Request) response.Response
 	}
 
 	cert := d.endpoints.NetworkCert()
-	client, err := cluster.Connect(address, cert, true)
+	client, err := cluster.Connect(address, cert, false)
 	if err != nil {
 		return response.SmartError(err)
 	}
-
 	_, _, err = client.RawQuery("POST", "/internal/cluster/promote", post, "")
 	if err != nil {
 		return response.SmartError(err)


More information about the lxc-devel mailing list