[lxc-devel] [lxd/master] Live migration fixes
stgraber on Github
lxc-bot at linuxcontainers.org
Tue Dec 11 03:03:16 UTC 2018
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 301 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20181211/435cbf5b/attachment.bin>
-------------- next part --------------
From 0050bb1d4c3bce0ff317a92148ab927b581d8150 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Mon, 10 Dec 2018 20:02:52 -0500
Subject: [PATCH 1/4] lxd/storage: Freeze containers during rsync
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Closes #5334
Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
lxd/storage_migration.go | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/lxd/storage_migration.go b/lxd/storage_migration.go
index 063da9ce9d..fcf87d6014 100644
--- a/lxd/storage_migration.go
+++ b/lxd/storage_migration.go
@@ -94,6 +94,17 @@ func (s rsyncStorageSourceDriver) SendWhileRunning(conn *websocket.Conn, op *ope
wrapper := StorageProgressReader(op, "fs_progress", s.container.Name())
state := s.container.DaemonState()
+
+ // Attempt to freeze the container to avoid changing files during transfer
+ if s.container.IsRunning() {
+ err := s.container.Freeze()
+ if err != nil {
+ logger.Errorf("Unable to freeze container during live-migration")
+ } else {
+ defer s.container.Unfreeze()
+ }
+ }
+
return RsyncSend(projectPrefix(s.container.Project(), ctName), shared.AddSlash(s.container.Path()), conn, wrapper, s.rsyncFeatures, bwlimit, state.OS.ExecPath)
}
From d7bf1a5435cf4c54b6ac63a5f07762fd5e65b8ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Mon, 10 Dec 2018 22:01:45 -0500
Subject: [PATCH 2/4] lxd/proxy: Improve shutdown code
---
lxd/proxy_device_utils.go | 41 +++++++++------------------------------
1 file changed, 9 insertions(+), 32 deletions(-)
diff --git a/lxd/proxy_device_utils.go b/lxd/proxy_device_utils.go
index d6616bc9b8..a53241d936 100644
--- a/lxd/proxy_device_utils.go
+++ b/lxd/proxy_device_utils.go
@@ -1,14 +1,13 @@
package main
import (
+ "bytes"
"fmt"
"io/ioutil"
"os"
- "path/filepath"
"strconv"
"strings"
"syscall"
- "time"
"github.com/lxc/lxd/shared"
)
@@ -98,15 +97,15 @@ func killProxyProc(pidPath string) error {
return nil
}
- // Check if it's a proxy process
- cmdPath, err := os.Readlink(fmt.Sprintf("/proc/%s/exe", pidString))
+ // Check if it's forkdns
+ cmdArgs, err := ioutil.ReadFile(fmt.Sprintf("/proc/%s/cmdline", pidString))
if err != nil {
- cmdPath = ""
+ os.Remove(pidPath)
+ return nil
}
- // Deal with deleted paths
- cmdName := filepath.Base(strings.Split(cmdPath, " ")[0])
- if cmdName != "lxd" {
+ cmdFields := strings.Split(string(bytes.TrimRight(cmdArgs, string("\x00"))), string(byte(0)))
+ if len(cmdFields) < 5 || cmdFields[1] != "forkproxy" {
os.Remove(pidPath)
return nil
}
@@ -117,34 +116,12 @@ func killProxyProc(pidPath string) error {
return err
}
- err = syscall.Kill(pidInt, syscall.SIGTERM)
+ // Actually kill the process
+ err = syscall.Kill(pidInt, syscall.SIGKILL)
if err != nil {
return err
}
- go func() {
- for i := 0; i < 6; i++ {
- time.Sleep(500 * time.Millisecond)
- // Check if the process still exists
- if !shared.PathExists(fmt.Sprintf("/proc/%s", pidString)) {
- return
- }
-
- // Check if it's a proxy process
- cmdPath, err := os.Readlink(fmt.Sprintf("/proc/%s/exe", pidString))
- if err != nil {
- cmdPath = ""
- }
-
- // Deal with deleted paths
- cmdName := filepath.Base(strings.Split(cmdPath, " ")[0])
- if cmdName != "lxd" {
- return
- }
- }
- syscall.Kill(pidInt, syscall.SIGKILL)
- }()
-
// Cleanup
os.Remove(pidPath)
return nil
From 40de45091848a93dabff83fd24baef2fcc1d836d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Mon, 10 Dec 2018 22:02:00 -0500
Subject: [PATCH 3/4] lxd/migration: Re-spawn proxy devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Closes #5333
Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
lxd/container_lxc.go | 21 +++++++++++++++++++--
1 file changed, 19 insertions(+), 2 deletions(-)
diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index e30d8cdc60..0722cf1008 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -2457,9 +2457,16 @@ func (c *containerLXC) Start(stateful bool) error {
return errors.Wrap(err, "Start container")
}
- logger.Info("Started container", ctxMap)
+ // Start proxy devices
+ err = c.restartProxyDevices()
+ if err != nil {
+ // Attempt to stop the container
+ c.Stop(false)
+ return err
+ }
- return err
+ logger.Info("Started container", ctxMap)
+ return nil
} else if c.stateful {
/* stateless start required when we have state, let's delete it */
err := os.RemoveAll(c.StatePath())
@@ -5350,6 +5357,16 @@ func (c *containerLXC) Migrate(args *CriuMigrationArgs) error {
logger.Debugf("forkmigrate: %s", line)
}
}
+
+ if migrateErr == nil {
+ // Start proxy devices
+ err = c.restartProxyDevices()
+ if err != nil {
+ // Attempt to stop the container
+ c.Stop(false)
+ return err
+ }
+ }
} else if args.cmd == lxc.MIGRATE_FEATURE_CHECK {
err := c.initLXC(true)
if err != nil {
From 7d59744907bf2d3a3c0719ae2aaed71b714f2817 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Mon, 10 Dec 2018 20:13:46 -0500
Subject: [PATCH 4/4] tests: Improve live-migration tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
test/suites/migration.sh | 24 ++++++++++++++++++------
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/test/suites/migration.sh b/test/suites/migration.sh
index 29e2d17cf5..ad5e0fc668 100644
--- a/test/suites/migration.sh
+++ b/test/suites/migration.sh
@@ -345,20 +345,32 @@ migration() {
return
fi
+ echo "==> CRIU: starting testing live-migration"
lxc_remote launch testimage l1:migratee
- # let the container do some interesting things
+ # Wait for the container to be done booting
sleep 1
+ # Test stateful stop
lxc_remote stop --stateful l1:migratee
lxc_remote start l1:migratee
+
+ # Test stateful snapshots
lxc_remote snapshot --stateful l1:migratee
- lxc_remote stop -f l1:migratee
- lxc_remote copy l1:migratee/snap0 l2:migratee
- ! lxc_remote copy l1:migratee/snap0 l2:migratee-new-name
- lxc_remote copy --stateless l1:migratee/snap0 l2:migratee-new-name
+ lxc_remote restore l1:migratee snap0
+
+ # Test live migration of container
+ lxc_remote move l1:migratee l2:migratee
+
+ # Test copy of stateful snapshot
+ lxc_remote copy l2:migratee/snap0 l1:migratee
+ ! lxc_remote copy l2:migratee/snap0 l1:migratee-new-name
+
+ # Test stateless copies
+ lxc_remote copy --stateless l2:migratee/snap0 l1:migratee-new-name
+ # Cleanup
lxc_remote delete --force l1:migratee
lxc_remote delete --force l2:migratee
- lxc_remote delete --force l2:migratee-new-name
+ lxc_remote delete --force l1:migratee-new-name
}
More information about the lxc-devel
mailing list