[lxc-devel] [lxd/master] Fix clustered ceph handling and test tweaks

stgraber on Github lxc-bot at linuxcontainers.org
Wed Dec 4 23:43:10 UTC 2019


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 301 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20191204/1b64c0e3/attachment-0001.bin>
-------------- next part --------------
From 6af0be605355423069f1463853434eea6b3b835c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 4 Dec 2019 17:35:01 -0500
Subject: [PATCH 1/3] lxd/cluster: Fix handling of ceph/cephfs on join
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #6537

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/api_cluster.go        | 32 ++++++++++++++++----------------
 lxd/cluster/membership.go |  4 ++--
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/lxd/api_cluster.go b/lxd/api_cluster.go
index 209fb08ed4..df19253417 100644
--- a/lxd/api_cluster.go
+++ b/lxd/api_cluster.go
@@ -22,7 +22,6 @@ import (
 	"github.com/lxc/lxd/lxd/node"
 	"github.com/lxc/lxd/lxd/operations"
 	"github.com/lxc/lxd/lxd/response"
-	storagedriver "github.com/lxc/lxd/lxd/storage"
 	"github.com/lxc/lxd/lxd/util"
 	"github.com/lxc/lxd/shared"
 	"github.com/lxc/lxd/shared/api"
@@ -467,8 +466,7 @@ func clusterPutJoin(d *Daemon, req api.ClusterPut) response.Response {
 			return err
 		}
 
-		// Remove the our old server certificate from the trust store,
-		// since it's not needed anymore.
+		// Remove our old server certificate from the trust store, since it's not needed anymore.
 		_, err = d.cluster.CertificateGet(fingerprint)
 		if err != db.ErrNoSuchObject {
 			if err != nil {
@@ -481,31 +479,31 @@ func clusterPutJoin(d *Daemon, req api.ClusterPut) response.Response {
 			}
 		}
 
-		// For ceph pools we have to create the mount points too.
+		// For ceph pools we have to trigger the local mountpoint creation too.
 		poolNames, err = d.cluster.StoragePools()
 		if err != nil && err != db.ErrNoSuchObject {
 			return err
 		}
+
 		for _, name := range poolNames {
-			_, pool, err := d.cluster.StoragePoolGet(name)
+			id, pool, err := d.cluster.StoragePoolGet(name)
 			if err != nil {
 				return err
 			}
 
-			if pool.Driver != "ceph" {
+			if !shared.StringInSlice(pool.Driver, []string{"ceph", "cephfs"}) {
 				continue
 			}
 
-			storage, err := storagePoolInit(d.State(), name)
-			if err != nil {
-				return errors.Wrap(err, "Failed to init ceph pool for joining member")
-			}
+			// Re-assemble a StoragePoolsPost
+			req := api.StoragePoolsPost{}
+			req.StoragePoolPut = pool.StoragePoolPut
+			req.Name = pool.Name
+			req.Driver = pool.Driver
 
-			volumeMntPoint := storagedriver.GetStoragePoolVolumeMountPoint(
-				name, storage.(*storageCeph).volume.Name)
-			err = os.MkdirAll(volumeMntPoint, 0711)
+			_, err = storagePoolCreateLocal(d.State(), id, req, true)
 			if err != nil {
-				return errors.Wrap(err, "Failed to create ceph pool mount point")
+				return errors.Wrap(err, "Failed to init ceph/cephfs pool for joining member")
 			}
 		}
 
@@ -712,7 +710,7 @@ func clusterInitMember(d, client lxd.InstanceServer, memberConfig []api.ClusterM
 
 		// Skip ceph pools since they have no node-specific key and
 		// don't need to be defined on joining nodes.
-		if pool.Driver == "ceph" {
+		if shared.StringInSlice(pool.Driver, []string{"ceph", "cephfs"}) {
 			continue
 		}
 
@@ -1240,12 +1238,14 @@ func clusterCheckStoragePoolsMatch(cluster *db.Cluster, reqPools []api.StoragePo
 			if err != nil {
 				return err
 			}
+
 			// Ignore missing ceph pools, since they'll be shared
 			// and we don't require them to be defined on the
 			// joining node.
-			if pool.Driver == "ceph" {
+			if shared.StringInSlice(pool.Driver, []string{"ceph", "cephfs"}) {
 				continue
 			}
+
 			return fmt.Errorf("Missing storage pool %s", name)
 		}
 	}
diff --git a/lxd/cluster/membership.go b/lxd/cluster/membership.go
index b572400553..f2bd48eb72 100644
--- a/lxd/cluster/membership.go
+++ b/lxd/cluster/membership.go
@@ -396,14 +396,14 @@ func Join(state *state.State, gateway *Gateway, cert *shared.CertInfo, name stri
 			if err != nil {
 				return errors.Wrap(err, "failed to get storage pool driver")
 			}
-			if driver == "ceph" {
+
+			if shared.StringInSlice(driver, []string{"ceph", "cephfs"}) {
 				// For ceph pools we have to create volume
 				// entries for the joining node.
 				err := tx.StoragePoolNodeJoinCeph(id, node.ID)
 				if err != nil {
 					return errors.Wrap(err, "failed to create ceph volumes for joining node")
 				}
-
 			} else {
 				// For other pools we add the config provided by the joining node.
 				config, ok := pools[name]

From fc773f8edfc1fde56b70fafec599a077584bc66c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 4 Dec 2019 17:55:07 -0500
Subject: [PATCH 2/3] tests: Always use force with stop/restart
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 test/suites/container_devices_disk.sh       | 4 ++--
 test/suites/container_devices_nic_ipvlan.sh | 2 +-
 test/suites/container_devices_nic_routed.sh | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/suites/container_devices_disk.sh b/test/suites/container_devices_disk.sh
index faf277a9ed..da5f200a61 100644
--- a/test/suites/container_devices_disk.sh
+++ b/test/suites/container_devices_disk.sh
@@ -109,7 +109,7 @@ test_container_devices_disk_ceph() {
   lxc launch testimage ceph-disk -c security.privileged=true
   lxc config device add ceph-disk rbd disk source=ceph:"${RBD_POOL_NAME}"/my-volume ceph.user_name=admin ceph.cluster_name=ceph path=/ceph
   lxc exec ceph-disk -- stat /ceph/lost+found
-  lxc restart ceph-disk
+  lxc restart ceph-disk --force
   lxc exec ceph-disk -- stat /ceph/lost+found
   lxc delete -f ceph-disk
 }
@@ -126,7 +126,7 @@ test_container_devices_disk_cephfs() {
   lxc launch testimage ceph-fs -c security.privileged=true
   lxc config device add ceph-fs fs disk source=cephfs:"${LXD_CEPH_CEPHFS}"/ ceph.user_name=admin ceph.cluster_name=ceph path=/cephfs
   lxc exec ceph-fs -- stat /cephfs
-  lxc restart ceph-fs
+  lxc restart ceph-fs --force
   lxc exec ceph-fs -- stat /cephfs
   lxc delete -f ceph-fs
 }
diff --git a/test/suites/container_devices_nic_ipvlan.sh b/test/suites/container_devices_nic_ipvlan.sh
index 5018f7adc7..3e82618ea1 100644
--- a/test/suites/container_devices_nic_ipvlan.sh
+++ b/test/suites/container_devices_nic_ipvlan.sh
@@ -35,7 +35,7 @@ test_container_devices_nic_ipvlan() {
     false
   fi
 
-  lxc stop "${ctName}"
+  lxc stop "${ctName}" --force
 
   # Check that MTU is inherited from parent device when not specified on device.
   ip link set "${ctName}" mtu 1405
diff --git a/test/suites/container_devices_nic_routed.sh b/test/suites/container_devices_nic_routed.sh
index f53f1d62ef..4cdb074e3e 100644
--- a/test/suites/container_devices_nic_routed.sh
+++ b/test/suites/container_devices_nic_routed.sh
@@ -66,7 +66,7 @@ test_container_devices_nic_routed() {
     false
   fi
 
-  lxc stop "${ctName}"
+  lxc stop "${ctName}" --force
 
   # Check that MTU is inherited from parent device when not specified on device.
   ip link set "${ctName}" mtu 1605

From 0109af6d3f63e62d80589ead3d2c5c51ab841ab2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Wed, 4 Dec 2019 18:42:50 -0500
Subject: [PATCH 3/3] tests: Tighten sleep calls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 test/suites/clustering.sh                    | 8 ++++----
 test/suites/container_devices_nic_bridged.sh | 2 +-
 test/suites/container_devices_nic_routed.sh  | 9 ++++++---
 test/suites/devlxd.sh                        | 4 ++--
 test/suites/fdleak.sh                        | 4 ++--
 5 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/test/suites/clustering.sh b/test/suites/clustering.sh
index ff55d2f613..74c18ab2ce 100644
--- a/test/suites/clustering.sh
+++ b/test/suites/clustering.sh
@@ -123,7 +123,7 @@ test_clustering_membership() {
   # detected as down.
   LXD_DIR="${LXD_ONE_DIR}" lxc config set cluster.offline_threshold 12
   LXD_DIR="${LXD_THREE_DIR}" lxd shutdown
-  sleep 30
+  sleep 15
   LXD_DIR="${LXD_TWO_DIR}" lxc cluster list
   #| grep "node3" | grep -q "OFFLINE"
 
@@ -284,7 +284,7 @@ test_clustering_containers() {
   # containers.
   LXD_DIR="${LXD_THREE_DIR}" lxc config set cluster.offline_threshold 12
   LXD_DIR="${LXD_TWO_DIR}" lxd shutdown
-  sleep 30
+  sleep 15
   LXD_DIR="${LXD_ONE_DIR}" lxc list | grep foo | grep -q ERROR
   LXD_DIR="${LXD_ONE_DIR}" lxc config set cluster.offline_threshold 20
 
@@ -470,7 +470,7 @@ test_clustering_storage() {
     # Shutdown node 3, and wait for it to be considered offline.
     LXD_DIR="${LXD_THREE_DIR}" lxc config set cluster.offline_threshold 12
     LXD_DIR="${LXD_THREE_DIR}" lxd shutdown
-    sleep 30
+    sleep 15
 
     # Move the container back to node2, even if node3 is offline
     LXD_DIR="${LXD_ONE_DIR}" lxc move bar --target node2
@@ -985,7 +985,7 @@ test_clustering_shutdown_nodes() {
   wait "$(cat three.pid)"
 
   # Make sure the database is not available to the first node
-  sleep 30
+  sleep 15
   LXD_DIR="${LXD_ONE_DIR}" lxd shutdown
 
   # Wait for LXD to terminate, otherwise the db will not be empty, and the
diff --git a/test/suites/container_devices_nic_bridged.sh b/test/suites/container_devices_nic_bridged.sh
index 0363dca052..bcbadf933b 100644
--- a/test/suites/container_devices_nic_bridged.sh
+++ b/test/suites/container_devices_nic_bridged.sh
@@ -84,7 +84,7 @@ test_container_devices_nic_bridged() {
   lxc exec "${ctName}" -- ip -4 route add default dev eth0
   ping -c2 -W1 "192.0.2.1${ipRand}"
   lxc exec "${ctName}" -- ip -6 addr add "2001:db8::1${ipRand}/128" dev eth0
-  sleep 2 #Wait for link local gateway advert.
+  sleep 2 # Wait for link local gateway advert.
   ping6 -c2 -W1 "2001:db8::1${ipRand}"
 
   # Test hot plugging a container nic with different settings to profile with the same name.
diff --git a/test/suites/container_devices_nic_routed.sh b/test/suites/container_devices_nic_routed.sh
index 4cdb074e3e..ddb3654c60 100644
--- a/test/suites/container_devices_nic_routed.sh
+++ b/test/suites/container_devices_nic_routed.sh
@@ -30,7 +30,8 @@ test_container_devices_nic_routed() {
     if ! ip -6 a show dev "${ctName}" | grep "tentative" ; then
       break
     fi
-    sleep 2
+
+    sleep 0.5
   done
 
   # Create dummy vlan parent.
@@ -96,7 +97,8 @@ test_container_devices_nic_routed() {
     if ! lxc exec "${ctName}" -- ip -6 a show dev eth0 | grep "tentative" ; then
       break
     fi
-    sleep 2
+
+    sleep 0.5
   done
 
   while true
@@ -104,7 +106,8 @@ test_container_devices_nic_routed() {
     if ! lxc exec "${ctName}2" -- ip -6 a show dev eth0 | grep "tentative" ; then
       break
     fi
-    sleep 2
+
+    sleep 0.5
   done
 
   # Check comms between containers.
diff --git a/test/suites/devlxd.sh b/test/suites/devlxd.sh
index ae6f890605..e4ce296eee 100644
--- a/test/suites/devlxd.sh
+++ b/test/suites/devlxd.sh
@@ -60,7 +60,7 @@ EOF
   MATCH=0
 
   # shellcheck disable=SC2034
-  for i in $(seq 5); do
+  for i in $(seq 10); do
     lxc config set devlxd user.foo bar
     lxc config set devlxd security.nesting true
 
@@ -72,7 +72,7 @@ EOF
     lxc config device remove devlxd mnt
 
     if [ "$(tr -d '\0' < "${TEST_DIR}/devlxd.log" | md5sum | cut -d' ' -f1)" != "$(md5sum "${TEST_DIR}/devlxd.expected" | cut -d' ' -f1)" ]; then
-      sleep 1
+      sleep 0.5
       continue
     fi
 
diff --git a/test/suites/fdleak.sh b/test/suites/fdleak.sh
index fcb0bc8db8..f7a4d43e88 100644
--- a/test/suites/fdleak.sh
+++ b/test/suites/fdleak.sh
@@ -27,12 +27,12 @@ test_fdleak() {
     exit 0
   )
 
-  for i in $(seq 10); do
+  for i in $(seq 20); do
     afterfds=$(/bin/ls "/proc/${pid}/fd" | wc -l)
     leakedfds=$((afterfds - beforefds))
 
     [ "${leakedfds}" -gt 5 ] || break
-    sleep 1
+    sleep 0.5
   done
 
   bad=0


More information about the lxc-devel mailing list