[lxc-devel] [lxd/master] Live migration fixes

stgraber on Github lxc-bot at linuxcontainers.org
Tue Dec 11 03:03:16 UTC 2018


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 301 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20181211/435cbf5b/attachment.bin>
-------------- next part --------------
From 0050bb1d4c3bce0ff317a92148ab927b581d8150 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Mon, 10 Dec 2018 20:02:52 -0500
Subject: [PATCH 1/4] lxd/storage: Freeze containers during rsync
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #5334

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/storage_migration.go | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/lxd/storage_migration.go b/lxd/storage_migration.go
index 063da9ce9d..fcf87d6014 100644
--- a/lxd/storage_migration.go
+++ b/lxd/storage_migration.go
@@ -94,6 +94,17 @@ func (s rsyncStorageSourceDriver) SendWhileRunning(conn *websocket.Conn, op *ope
 
 	wrapper := StorageProgressReader(op, "fs_progress", s.container.Name())
 	state := s.container.DaemonState()
+
+	// Attempt to freeze the container to avoid changing files during transfer
+	if s.container.IsRunning() {
+		err := s.container.Freeze()
+		if err != nil {
+			logger.Errorf("Unable to freeze container during live-migration")
+		} else {
+			defer s.container.Unfreeze()
+		}
+	}
+
 	return RsyncSend(projectPrefix(s.container.Project(), ctName), shared.AddSlash(s.container.Path()), conn, wrapper, s.rsyncFeatures, bwlimit, state.OS.ExecPath)
 }
 

From d7bf1a5435cf4c54b6ac63a5f07762fd5e65b8ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Mon, 10 Dec 2018 22:01:45 -0500
Subject: [PATCH 2/4] lxd/proxy: Improve shutdown code

---
 lxd/proxy_device_utils.go | 41 +++++++++------------------------------
 1 file changed, 9 insertions(+), 32 deletions(-)

diff --git a/lxd/proxy_device_utils.go b/lxd/proxy_device_utils.go
index d6616bc9b8..a53241d936 100644
--- a/lxd/proxy_device_utils.go
+++ b/lxd/proxy_device_utils.go
@@ -1,14 +1,13 @@
 package main
 
 import (
+	"bytes"
 	"fmt"
 	"io/ioutil"
 	"os"
-	"path/filepath"
 	"strconv"
 	"strings"
 	"syscall"
-	"time"
 
 	"github.com/lxc/lxd/shared"
 )
@@ -98,15 +97,15 @@ func killProxyProc(pidPath string) error {
 		return nil
 	}
 
-	// Check if it's a proxy process
-	cmdPath, err := os.Readlink(fmt.Sprintf("/proc/%s/exe", pidString))
+	// Check if it's forkdns
+	cmdArgs, err := ioutil.ReadFile(fmt.Sprintf("/proc/%s/cmdline", pidString))
 	if err != nil {
-		cmdPath = ""
+		os.Remove(pidPath)
+		return nil
 	}
 
-	// Deal with deleted paths
-	cmdName := filepath.Base(strings.Split(cmdPath, " ")[0])
-	if cmdName != "lxd" {
+	cmdFields := strings.Split(string(bytes.TrimRight(cmdArgs, string("\x00"))), string(byte(0)))
+	if len(cmdFields) < 5 || cmdFields[1] != "forkproxy" {
 		os.Remove(pidPath)
 		return nil
 	}
@@ -117,34 +116,12 @@ func killProxyProc(pidPath string) error {
 		return err
 	}
 
-	err = syscall.Kill(pidInt, syscall.SIGTERM)
+	// Actually kill the process
+	err = syscall.Kill(pidInt, syscall.SIGKILL)
 	if err != nil {
 		return err
 	}
 
-	go func() {
-		for i := 0; i < 6; i++ {
-			time.Sleep(500 * time.Millisecond)
-			// Check if the process still exists
-			if !shared.PathExists(fmt.Sprintf("/proc/%s", pidString)) {
-				return
-			}
-
-			// Check if it's a proxy process
-			cmdPath, err := os.Readlink(fmt.Sprintf("/proc/%s/exe", pidString))
-			if err != nil {
-				cmdPath = ""
-			}
-
-			// Deal with deleted paths
-			cmdName := filepath.Base(strings.Split(cmdPath, " ")[0])
-			if cmdName != "lxd" {
-				return
-			}
-		}
-		syscall.Kill(pidInt, syscall.SIGKILL)
-	}()
-
 	// Cleanup
 	os.Remove(pidPath)
 	return nil

From 40de45091848a93dabff83fd24baef2fcc1d836d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Mon, 10 Dec 2018 22:02:00 -0500
Subject: [PATCH 3/4] lxd/migration: Re-spawn proxy devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #5333

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/container_lxc.go | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index e30d8cdc60..0722cf1008 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -2457,9 +2457,16 @@ func (c *containerLXC) Start(stateful bool) error {
 			return errors.Wrap(err, "Start container")
 		}
 
-		logger.Info("Started container", ctxMap)
+		// Start proxy devices
+		err = c.restartProxyDevices()
+		if err != nil {
+			// Attempt to stop the container
+			c.Stop(false)
+			return err
+		}
 
-		return err
+		logger.Info("Started container", ctxMap)
+		return nil
 	} else if c.stateful {
 		/* stateless start required when we have state, let's delete it */
 		err := os.RemoveAll(c.StatePath())
@@ -5350,6 +5357,16 @@ func (c *containerLXC) Migrate(args *CriuMigrationArgs) error {
 				logger.Debugf("forkmigrate: %s", line)
 			}
 		}
+
+		if migrateErr == nil {
+			// Start proxy devices
+			err = c.restartProxyDevices()
+			if err != nil {
+				// Attempt to stop the container
+				c.Stop(false)
+				return err
+			}
+		}
 	} else if args.cmd == lxc.MIGRATE_FEATURE_CHECK {
 		err := c.initLXC(true)
 		if err != nil {

From 7d59744907bf2d3a3c0719ae2aaed71b714f2817 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Mon, 10 Dec 2018 20:13:46 -0500
Subject: [PATCH 4/4] tests: Improve live-migration tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 test/suites/migration.sh | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/test/suites/migration.sh b/test/suites/migration.sh
index 29e2d17cf5..ad5e0fc668 100644
--- a/test/suites/migration.sh
+++ b/test/suites/migration.sh
@@ -345,20 +345,32 @@ migration() {
     return
   fi
 
+  echo "==> CRIU: starting testing live-migration"
   lxc_remote launch testimage l1:migratee
 
-  # let the container do some interesting things
+  # Wait for the container to be done booting
   sleep 1
 
+  # Test stateful stop
   lxc_remote stop --stateful l1:migratee
   lxc_remote start l1:migratee
+
+  # Test stateful snapshots
   lxc_remote snapshot --stateful l1:migratee
-  lxc_remote stop -f l1:migratee
-  lxc_remote copy l1:migratee/snap0 l2:migratee
-  ! lxc_remote copy l1:migratee/snap0 l2:migratee-new-name
-  lxc_remote copy --stateless l1:migratee/snap0 l2:migratee-new-name
+  lxc_remote restore l1:migratee snap0
+
+  # Test live migration of container
+  lxc_remote move l1:migratee l2:migratee
+
+  # Test copy of stateful snapshot
+  lxc_remote copy l2:migratee/snap0 l1:migratee
+  ! lxc_remote copy l2:migratee/snap0 l1:migratee-new-name
+
+  # Test stateless copies
+  lxc_remote copy --stateless l2:migratee/snap0 l1:migratee-new-name
 
+  # Cleanup
   lxc_remote delete --force l1:migratee
   lxc_remote delete --force l2:migratee
-  lxc_remote delete --force l2:migratee-new-name
+  lxc_remote delete --force l1:migratee-new-name
 }


More information about the lxc-devel mailing list