[lxc-devel] [lxd/master] Fix some clustering errors
stgraber on Github
lxc-bot at linuxcontainers.org
Tue Oct 15 02:08:13 UTC 2019
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 301 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20191014/8177f8f6/attachment.bin>
-------------- next part --------------
From 525235e2609a8d0605b0c03709559771c8cffcb3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Mon, 14 Oct 2019 18:01:29 -0400
Subject: [PATCH 1/2] lxd/cluster: Tweak joining error messages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
lxd/api_cluster.go | 46 ++++++++++++++++++----------------
lxd/cluster/membership.go | 27 ++++++++++++--------
lxd/cluster/membership_test.go | 10 ++++----
3 files changed, 47 insertions(+), 36 deletions(-)
diff --git a/lxd/api_cluster.go b/lxd/api_cluster.go
index fee3485d11..bb5457104b 100644
--- a/lxd/api_cluster.go
+++ b/lxd/api_cluster.go
@@ -232,7 +232,7 @@ func clusterPutBootstrap(d *Daemon, req api.ClusterPut) response.Response {
d.db.Transaction(func(tx *db.NodeTx) error {
config, err := node.ConfigLoad(tx)
if err != nil {
- return errors.Wrap(err, "Fetch node configuration")
+ return errors.Wrap(err, "Failed to fetch member configuration")
}
clusterAddress := config.ClusterAddress()
@@ -266,13 +266,14 @@ func clusterPutBootstrap(d *Daemon, req api.ClusterPut) response.Response {
func clusterPutJoin(d *Daemon, req api.ClusterPut) response.Response {
// Make sure basic pre-conditions are met.
if len(req.ClusterCertificate) == 0 {
- return response.BadRequest(fmt.Errorf("No target cluster node certificate provided"))
+ return response.BadRequest(fmt.Errorf("No target cluster member certificate provided"))
}
clusterAddress, err := node.ClusterAddress(d.db)
if err != nil {
return response.SmartError(err)
}
+
if clusterAddress != "" {
return response.BadRequest(fmt.Errorf("This server is already clustered"))
}
@@ -284,7 +285,7 @@ func clusterPutJoin(d *Daemon, req api.ClusterPut) response.Response {
if address == "" {
if req.ServerAddress == "" {
- return response.BadRequest(fmt.Errorf("No core.https_address config key is set on this node"))
+ return response.BadRequest(fmt.Errorf("No core.https_address config key is set on this member"))
}
// The user has provided a server address, and no networking
@@ -427,17 +428,17 @@ func clusterPutJoin(d *Daemon, req api.ClusterPut) response.Response {
client, req.ServerName, address, cluster.SchemaVersion,
version.APIExtensionsCount(), pools, networks)
if err != nil {
- return errors.Wrap(err, "failed to request to add node")
+ return errors.Wrap(err, "Failed request to add member")
}
// Update our TLS configuration using the returned cluster certificate.
err = util.WriteCert(d.os.VarDir, "cluster", []byte(req.ClusterCertificate), info.PrivateKey, nil)
if err != nil {
- return errors.Wrap(err, "failed to save cluster certificate")
+ return errors.Wrap(err, "Failed to save cluster certificate")
}
cert, err := util.LoadCert(d.os.VarDir)
if err != nil {
- return errors.Wrap(err, "failed to parse cluster certificate")
+ return errors.Wrap(err, "Failed to parse cluster certificate")
}
d.endpoints.NetworkUpdateCert(cert)
@@ -476,7 +477,7 @@ func clusterPutJoin(d *Daemon, req api.ClusterPut) response.Response {
err := d.cluster.CertDelete(fingerprint)
if err != nil {
- return errors.Wrap(err, "failed to delete joining node's certificate")
+ return errors.Wrap(err, "Failed to delete joining member's certificate")
}
}
@@ -490,18 +491,21 @@ func clusterPutJoin(d *Daemon, req api.ClusterPut) response.Response {
if err != nil {
return err
}
+
if pool.Driver != "ceph" {
continue
}
+
storage, err := storagePoolInit(d.State(), name)
if err != nil {
- return errors.Wrap(err, "failed to init ceph pool for joining node")
+ return errors.Wrap(err, "Failed to init ceph pool for joining member")
}
+
volumeMntPoint := storagedriver.GetStoragePoolVolumeMountPoint(
name, storage.(*storageCeph).volume.Name)
err = os.MkdirAll(volumeMntPoint, 0711)
if err != nil {
- return errors.Wrap(err, "failed to create ceph pool mount point")
+ return errors.Wrap(err, "Failed to create ceph pool mount point")
}
}
@@ -556,7 +560,7 @@ func clusterPutJoin(d *Daemon, req api.ClusterPut) response.Response {
go func() {
leader, err := d.gateway.LeaderAddress()
if err != nil {
- logger.Errorf("Failed to get current leader node: %v", err)
+ logger.Errorf("Failed to get current leader member: %v", err)
return
}
var nodeInfo db.NodeInfo
@@ -566,12 +570,12 @@ func clusterPutJoin(d *Daemon, req api.ClusterPut) response.Response {
return err
})
if err != nil {
- logger.Errorf("Failed to retrieve the information of leader node: %v", err)
+ logger.Errorf("Failed to retrieve the information of leader member: %v", err)
return
}
imageProjectInfo, err := d.cluster.ImagesGetByNodeID(nodeInfo.ID)
if err != nil {
- logger.Errorf("Failed to retrieve the image fingerprints of leader node: %v", err)
+ logger.Errorf("Failed to retrieve the image fingerprints of leader member: %v", err)
return
}
@@ -639,7 +643,7 @@ func clusterPutDisable(d *Daemon) response.Response {
}
cert, err := util.LoadCert(d.os.VarDir)
if err != nil {
- return response.InternalError(errors.Wrap(err, "failed to parse node certificate"))
+ return response.InternalError(errors.Wrap(err, "Failed to parse member certificate"))
}
// Reset the cluster database and make it local to this node.
@@ -853,7 +857,7 @@ func clusterNodeGet(d *Daemon, r *http.Request) response.Response {
}
}
- return response.NotFound(fmt.Errorf("Node '%s' not found", name))
+ return response.NotFound(fmt.Errorf("Member '%s' not found", name))
}
func clusterNodePost(d *Daemon, r *http.Request) response.Response {
@@ -884,7 +888,7 @@ func clusterNodeDelete(d *Daemon, r *http.Request) response.Response {
}
name := mux.Vars(r)["name"]
- logger.Debugf("Delete node %s from cluster (force=%d)", name, force)
+ logger.Debugf("Deleting member %s from cluster (force=%d)", name, force)
// First check that the node is clear from containers and images and
// make it leave the database cluster, if it's part of it.
@@ -928,7 +932,7 @@ func clusterNodeDelete(d *Daemon, r *http.Request) response.Response {
// Remove node from the database
err = cluster.Purge(d.cluster, name)
if err != nil {
- return response.SmartError(errors.Wrap(err, "failed to remove node from database"))
+ return response.SmartError(errors.Wrap(err, "Failed to remove member from database"))
}
// Try to notify the leader.
err = tryClusterRebalance(d)
@@ -948,7 +952,7 @@ func clusterNodeDelete(d *Daemon, r *http.Request) response.Response {
put.Enabled = false
_, err = client.UpdateCluster(put, "")
if err != nil {
- return response.SmartError(errors.Wrap(err, "failed to cleanup the node"))
+ return response.SmartError(errors.Wrap(err, "Failed to cleanup the member"))
}
}
@@ -961,12 +965,12 @@ func tryClusterRebalance(d *Daemon) error {
leader, err := d.gateway.LeaderAddress()
if err != nil {
// This is not a fatal error, so let's just log it.
- return errors.Wrap(err, "failed to get current leader node")
+ return errors.Wrap(err, "failed to get current leader member")
}
cert := d.endpoints.NetworkCert()
client, err := cluster.Connect(leader, cert, true)
if err != nil {
- return errors.Wrap(err, "failed to connect to leader node")
+ return errors.Wrap(err, "failed to connect to leader member")
}
_, _, err = client.RawQuery("POST", "/internal/cluster/rebalance", nil, "")
if err != nil {
@@ -1000,7 +1004,7 @@ func internalClusterPostAccept(d *Daemon, r *http.Request) response.Response {
return response.InternalError(err)
}
if address != leader {
- logger.Debugf("Redirect node accept request to %s", leader)
+ logger.Debugf("Redirect member accept request to %s", leader)
url := &url.URL{
Scheme: "https",
Path: "/internal/cluster/accept",
@@ -1158,7 +1162,7 @@ func internalClusterPostPromote(d *Daemon, r *http.Request) response.Response {
// Sanity checks
if len(req.RaftNodes) == 0 {
- return response.BadRequest(fmt.Errorf("No raft nodes provided"))
+ return response.BadRequest(fmt.Errorf("No raft members provided"))
}
nodes := make([]db.RaftNode, len(req.RaftNodes))
diff --git a/lxd/cluster/membership.go b/lxd/cluster/membership.go
index e25b9267a0..b8b4456c1f 100644
--- a/lxd/cluster/membership.go
+++ b/lxd/cluster/membership.go
@@ -18,6 +18,7 @@ import (
"github.com/lxc/lxd/shared/api"
"github.com/lxc/lxd/shared/log15"
"github.com/lxc/lxd/shared/logger"
+ "github.com/lxc/lxd/shared/version"
"github.com/pkg/errors"
)
@@ -170,10 +171,11 @@ func Accept(state *state.State, gateway *Gateway, name, address string, schema,
if err != nil {
return err
}
+
// Add the new node
id, err := tx.NodeAdd(name, address)
if err != nil {
- return errors.Wrap(err, "failed to insert new node")
+ return errors.Wrap(err, "Failed to insert new node into the database")
}
// Mark the node as pending, so it will be skipped when
@@ -181,7 +183,7 @@ func Accept(state *state.State, gateway *Gateway, name, address string, schema,
// notifications.
err = tx.NodePending(id, true)
if err != nil {
- return errors.Wrap(err, "failed to mark new node as pending")
+ return errors.Wrap(err, "Failed to mark the new node as pending")
}
return nil
@@ -194,8 +196,9 @@ func Accept(state *state.State, gateway *Gateway, name, address string, schema,
// less than 3 database nodes).
nodes, err := gateway.currentRaftNodes()
if err != nil {
- return nil, errors.Wrap(err, "failed to get raft nodes from the log")
+ return nil, errors.Wrap(err, "Failed to get raft nodes from the log")
}
+
if len(nodes) < membershipMaxRaftNodes {
err = state.Node.Transaction(func(tx *db.NodeTx) error {
id, err := tx.RaftNodeAdd(address)
@@ -206,7 +209,7 @@ func Accept(state *state.State, gateway *Gateway, name, address string, schema,
return nil
})
if err != nil {
- return nil, errors.Wrap(err, "failed to insert new node into raft_nodes")
+ return nil, errors.Wrap(err, "Failed to insert new node into raft_nodes")
}
}
@@ -919,24 +922,28 @@ func membershipCheckClusterStateForBootstrapOrJoin(tx *db.ClusterTx) error {
func membershipCheckClusterStateForAccept(tx *db.ClusterTx, name string, address string, schema int, api int) error {
nodes, err := tx.Nodes()
if err != nil {
- return errors.Wrap(err, "failed to fetch current cluster nodes")
+ return errors.Wrap(err, "Failed to fetch current cluster nodes")
}
+
if len(nodes) == 1 && nodes[0].Address == "0.0.0.0" {
- return fmt.Errorf("clustering not enabled")
+ return fmt.Errorf("Clustering isn't enabled")
}
for _, node := range nodes {
if node.Name == name {
- return fmt.Errorf("cluster already has node with name %s", name)
+ return fmt.Errorf("The cluster already has a member with name: %s", name)
}
+
if node.Address == address {
- return fmt.Errorf("cluster already has node with address %s", address)
+ return fmt.Errorf("The cluster already has a member with address: %s", address)
}
+
if node.Schema != schema {
- return fmt.Errorf("schema version mismatch: cluster has %d", node.Schema)
+ return fmt.Errorf("The joining server version doesn't (expected %s with DB schema %v)", version.Version, schema)
}
+
if node.APIExtensions != api {
- return fmt.Errorf("API version mismatch: cluster has %d", node.APIExtensions)
+ return fmt.Errorf("The joining server version doesn't (expected %s with API count %v)", version.Version, api)
}
}
diff --git a/lxd/cluster/membership_test.go b/lxd/cluster/membership_test.go
index 01137857d9..f27b45e998 100644
--- a/lxd/cluster/membership_test.go
+++ b/lxd/cluster/membership_test.go
@@ -150,7 +150,7 @@ func TestAccept_UnmetPreconditions(t *testing.T) {
cluster.SchemaVersion,
len(version.APIExtensions),
func(f *membershipFixtures) {},
- "clustering not enabled",
+ "Clustering isn't enabled",
},
{
"rusp",
@@ -160,7 +160,7 @@ func TestAccept_UnmetPreconditions(t *testing.T) {
func(f *membershipFixtures) {
f.ClusterNode("5.6.7.8:666")
},
- "cluster already has node with name rusp",
+ "The cluster already has a member with name: rusp",
},
{
"buzz",
@@ -170,7 +170,7 @@ func TestAccept_UnmetPreconditions(t *testing.T) {
func(f *membershipFixtures) {
f.ClusterNode("5.6.7.8:666")
},
- "cluster already has node with address 5.6.7.8:666",
+ "The cluster already has a member with address: 5.6.7.8:666",
},
{
"buzz",
@@ -180,7 +180,7 @@ func TestAccept_UnmetPreconditions(t *testing.T) {
func(f *membershipFixtures) {
f.ClusterNode("5.6.7.8:666")
},
- fmt.Sprintf("schema version mismatch: cluster has %d", cluster.SchemaVersion),
+ fmt.Sprintf("The joining server version doesn't (expected %s with DB schema %d)", version.Version, cluster.SchemaVersion-1),
},
{
"buzz",
@@ -190,7 +190,7 @@ func TestAccept_UnmetPreconditions(t *testing.T) {
func(f *membershipFixtures) {
f.ClusterNode("5.6.7.8:666")
},
- fmt.Sprintf("API version mismatch: cluster has %d", len(version.APIExtensions)),
+ fmt.Sprintf("The joining server version doesn't (expected %s with API count %d)", version.Version, len(version.APIExtensions)-1),
},
}
for _, c := range cases {
From 530dc5e54a2a2cad98c515f19cb04806f5b305eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Mon, 14 Oct 2019 18:37:50 -0400
Subject: [PATCH 2/2] lxd/cluster: Fix already-clustered test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
lxd/api_cluster.go | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lxd/api_cluster.go b/lxd/api_cluster.go
index bb5457104b..ea2c2d3aa1 100644
--- a/lxd/api_cluster.go
+++ b/lxd/api_cluster.go
@@ -269,12 +269,12 @@ func clusterPutJoin(d *Daemon, req api.ClusterPut) response.Response {
return response.BadRequest(fmt.Errorf("No target cluster member certificate provided"))
}
- clusterAddress, err := node.ClusterAddress(d.db)
+ clustered, err := cluster.Enabled(d.db)
if err != nil {
return response.SmartError(err)
}
- if clusterAddress != "" {
+ if clustered {
return response.BadRequest(fmt.Errorf("This server is already clustered"))
}
More information about the lxc-devel
mailing list