[lxc-devel] [lxd/master] Simplify migration code

tych0 on Github lxc-bot at linuxcontainers.org
Tue Jun 14 22:37:51 UTC 2016


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 301 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20160614/f5df04a2/attachment.bin>
-------------- next part --------------
From 31f1e20ca1c97e5a0b857502697fb1f9c6a8b5af Mon Sep 17 00:00:00 2001
From: Tycho Andersen <tycho.andersen at canonical.com>
Date: Tue, 14 Jun 2016 17:40:34 +0000
Subject: [PATCH 1/2] simplify checkpoint/restore code everywhere

Some problems:

* We had various entry points for migration, each which collected logs in
  various different and inconsistent ways.
* We also had the StartFromMigrate call, and a Migrate() to which you could
  pass lxc.MIGRATE_RESTORE, which wasn't an obvious API.
* at each point we had a check that did the rootfs shifting if necessary
* we had to do findCriu everywhere manually

Now that we have a Migrate() call, let's just route everything through
that, and handle all of this in a uniform way.

Note that some findCriu calls are still prudent to do e.g. in snapshot
restore, before we actually do all the filesystem work to restore stuff if
the snapshot is stateful. I've left those sorts of calls in.

Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
---
 lxd/container.go     |  12 +--
 lxd/container_lxc.go | 246 +++++++++++++++++++++++++++++----------------------
 lxd/migrate.go       |  93 +------------------
 3 files changed, 143 insertions(+), 208 deletions(-)

diff --git a/lxd/container.go b/lxd/container.go
index f4a6307..09a6567 100644
--- a/lxd/container.go
+++ b/lxd/container.go
@@ -11,8 +11,6 @@ import (
 	"gopkg.in/lxc/go-lxc.v2"
 
 	"github.com/lxc/lxd/shared"
-
-	log "gopkg.in/inconshreveable/log15.v2"
 )
 
 // Helper functions
@@ -378,8 +376,7 @@ type container interface {
 
 	// Snapshots & migration
 	Restore(sourceContainer container) error
-	Migrate(cmd uint, stateDir string, stop bool) error
-	StartFromMigration(imagesDir string) error
+	Migrate(cmd uint, stateDir string, function string, stop bool) error
 	Snapshots() ([]container, error)
 
 	// Config handling
@@ -565,12 +562,7 @@ func containerCreateAsSnapshot(d *Daemon, args containerArgs, sourceContainer co
 		 * after snapshotting will fail.
 		 */
 
-		err = sourceContainer.Migrate(lxc.MIGRATE_DUMP, stateDir, false)
-		err2 := CollectCRIULogFile(sourceContainer, stateDir, "snapshot", "dump")
-		if err2 != nil {
-			shared.Log.Warn("failed to collect criu log file", log.Ctx{"error": err2})
-		}
-
+		err = sourceContainer.Migrate(lxc.MIGRATE_DUMP, stateDir, "snapshot", false)
 		if err != nil {
 			os.RemoveAll(sourceContainer.StatePath())
 			return nil, err
diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 2623174..e654e7a 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -2,6 +2,7 @@ package main
 
 import (
 	"archive/tar"
+	"bufio"
 	"encoding/json"
 	"fmt"
 	"io"
@@ -1168,30 +1169,7 @@ func (c *containerLXC) Start(stateful bool) error {
 			return fmt.Errorf("Container has no existing state to restore.")
 		}
 
-		if err := findCriu("snapshot"); err != nil {
-			return err
-		}
-
-		if !c.IsPrivileged() {
-			if err := c.IdmapSet().ShiftRootfs(c.StatePath()); err != nil {
-				return err
-			}
-		}
-
-		out, err := exec.Command(
-			execPath,
-			"forkmigrate",
-			c.name,
-			c.daemon.lxcpath,
-			configPath,
-			c.StatePath()).CombinedOutput()
-		if string(out) != "" {
-			for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") {
-				shared.Debugf("forkmigrate: %s", line)
-			}
-		}
-		CollectCRIULogFile(c, c.StatePath(), "snapshot", "restore")
-
+		err := c.Migrate(lxc.MIGRATE_RESTORE, c.StatePath(), "snapshot", false)
 		if err != nil && !c.IsRunning() {
 			return err
 		}
@@ -1239,41 +1217,6 @@ func (c *containerLXC) Start(stateful bool) error {
 	return nil
 }
 
-func (c *containerLXC) StartFromMigration(imagesDir string) error {
-	// Run the shared start code
-	configPath, err := c.startCommon()
-	if err != nil {
-		return err
-	}
-
-	// Start the LXC container
-	out, err := exec.Command(
-		execPath,
-		"forkmigrate",
-		c.name,
-		c.daemon.lxcpath,
-		configPath,
-		imagesDir).CombinedOutput()
-
-	if string(out) != "" {
-		for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") {
-			shared.Debugf("forkmigrate: %s", line)
-		}
-	}
-
-	if err != nil && !c.IsRunning() {
-		return fmt.Errorf(
-			"Error calling 'lxd forkmigrate %s %s %s %s': err='%v'",
-			c.name,
-			c.daemon.lxcpath,
-			filepath.Join(c.LogPath(), "lxc.conf"),
-			imagesDir,
-			err)
-	}
-
-	return nil
-}
-
 func (c *containerLXC) OnStart() error {
 	// Make sure we can't call go-lxc functions by mistake
 	c.fromHook = true
@@ -1382,10 +1325,6 @@ func (c *containerLXC) setupStopping() *sync.WaitGroup {
 func (c *containerLXC) Stop(stateful bool) error {
 	// Handle stateful stop
 	if stateful {
-		if err := findCriu("snapshot"); err != nil {
-			return err
-		}
-
 		// Cleanup any existing state
 		stateDir := c.StatePath()
 		os.RemoveAll(stateDir)
@@ -1396,12 +1335,7 @@ func (c *containerLXC) Stop(stateful bool) error {
 		}
 
 		// Checkpoint
-		err = c.Migrate(lxc.MIGRATE_DUMP, stateDir, true)
-		err2 := CollectCRIULogFile(c, stateDir, "snapshot", "dump")
-		if err2 != nil {
-			shared.Log.Warn("failed to collect criu log file", log.Ctx{"error": err2})
-		}
-
+		err = c.Migrate(lxc.MIGRATE_DUMP, stateDir, "snapshot", true)
 		if err != nil {
 			return err
 		}
@@ -1774,32 +1708,7 @@ func (c *containerLXC) Restore(sourceContainer container) error {
 	// If the container wasn't running but was stateful, should we restore
 	// it as running?
 	if shared.PathExists(c.StatePath()) {
-		configPath, err := c.startCommon()
-		if err != nil {
-			return err
-		}
-
-		if !c.IsPrivileged() {
-			if err := c.IdmapSet().ShiftRootfs(c.StatePath()); err != nil {
-				return err
-			}
-		}
-
-		out, err := exec.Command(
-			execPath,
-			"forkmigrate",
-			c.name,
-			c.daemon.lxcpath,
-			configPath,
-			c.StatePath()).CombinedOutput()
-		if string(out) != "" {
-			for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") {
-				shared.Debugf("forkmigrate: %s", line)
-			}
-		}
-		CollectCRIULogFile(c, c.StatePath(), "snapshot", "restore")
-
-		if err != nil {
+		if err := c.Migrate(lxc.MIGRATE_RESTORE, c.StatePath(), "snapshot", false); err != nil {
 			return err
 		}
 
@@ -2724,28 +2633,149 @@ func (c *containerLXC) Export(w io.Writer) error {
 	return tw.Close()
 }
 
-func (c *containerLXC) Migrate(cmd uint, stateDir string, stop bool) error {
-	err := c.initLXC()
+func collectCRIULogFile(c container, imagesDir string, function string, method string) error {
+	t := time.Now().Format(time.RFC3339)
+	newPath := shared.LogPath(c.Name(), fmt.Sprintf("%s_%s_%s.log", function, method, t))
+	return shared.FileCopy(filepath.Join(imagesDir, fmt.Sprintf("%s.log", method)), newPath)
+}
+
+func getCRIULogErrors(imagesDir string, method string) (string, error) {
+	f, err := os.Open(path.Join(imagesDir, fmt.Sprintf("%s.log", method)))
+	if err != nil {
+		return "", err
+	}
+
+	defer f.Close()
+
+	scanner := bufio.NewScanner(f)
+	ret := []string{}
+	for scanner.Scan() {
+		line := scanner.Text()
+		if strings.Contains(line, "Error") {
+			ret = append(ret, scanner.Text())
+		}
+	}
+
+	return strings.Join(ret, "\n"), nil
+}
+
+func findCriu(host string) error {
+	_, err := exec.LookPath("criu")
 	if err != nil {
+		return fmt.Errorf("CRIU is required for live migration but its binary couldn't be found on the %s server. Is it installed in LXD's path?", host)
+	}
+
+	return nil
+}
+
+func (c *containerLXC) Migrate(cmd uint, stateDir string, function string, stop bool) error {
+	if err := findCriu(function); err != nil {
 		return err
 	}
 
-	preservesInodes := c.storage.PreservesInodes()
-	/* This feature was only added in 2.0.1, let's not ask for it
-	 * before then or migrations will fail.
+	prettyCmd := ""
+	switch cmd {
+	case lxc.MIGRATE_PRE_DUMP:
+		prettyCmd = "pre-dump"
+	case lxc.MIGRATE_DUMP:
+		prettyCmd = "dump"
+	case lxc.MIGRATE_RESTORE:
+		prettyCmd = "restore"
+	default:
+		prettyCmd = "unknown"
+		shared.Log.Warn("unknown migrate call", log.Ctx{"cmd": cmd})
+	}
+
+	var migrateErr error
+
+	/* For restore, we need an extra fork so that we daemonize monitor
+	 * instead of having it be a child of LXD, so let's hijack the command
+	 * here and do the extra fork.
 	 */
-	if !lxc.VersionAtLeast(2, 0, 1) {
-		preservesInodes = false
+	if cmd == lxc.MIGRATE_RESTORE {
+		// Run the shared start
+		_, err := c.startCommon()
+		if err != nil {
+			return err
+		}
+
+		/*
+		 * For unprivileged containers we need to shift the
+		 * perms on the images images so that they can be
+		 * opened by the process after it is in its user
+		 * namespace.
+		 */
+		if !c.IsPrivileged() {
+			if err := c.IdmapSet().ShiftRootfs(stateDir); err != nil {
+				return err
+			}
+		}
+
+		configPath := filepath.Join(c.LogPath(), "lxc.conf")
+
+		var out []byte
+		out, migrateErr = exec.Command(
+			execPath,
+			"forkmigrate",
+			c.name,
+			c.daemon.lxcpath,
+			configPath,
+			stateDir).CombinedOutput()
+
+		if string(out) != "" {
+			for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") {
+				shared.Debugf("forkmigrate: %s", line)
+			}
+		}
+
+		if migrateErr != nil && !c.IsRunning() {
+			migrateErr = fmt.Errorf(
+				"Error calling 'lxd forkmigrate %s %s %s %s': err='%v' out='%v'",
+				c.name,
+				c.daemon.lxcpath,
+				filepath.Join(c.LogPath(), "lxc.conf"),
+				stateDir,
+				err,
+				string(out))
+		}
+
+	} else {
+		err := c.initLXC()
+		if err != nil {
+			return err
+		}
+
+		preservesInodes := c.storage.PreservesInodes()
+		/* This feature was only added in 2.0.1, let's not ask for it
+		 * before then or migrations will fail.
+		 */
+		if !lxc.VersionAtLeast(2, 0, 1) {
+			preservesInodes = false
+		}
+
+		opts := lxc.MigrateOptions{
+			Stop:            stop,
+			Directory:       stateDir,
+			Verbose:         true,
+			PreservesInodes: preservesInodes,
+		}
+
+		migrateErr = c.c.Migrate(cmd, opts)
+	}
+
+	collectErr := collectCRIULogFile(c, stateDir, function, prettyCmd)
+	if collectErr != nil {
+		shared.Log.Error("Error collecting checkpoint log file", log.Ctx{"err": collectErr})
 	}
 
-	opts := lxc.MigrateOptions{
-		Stop:            stop,
-		Directory:       stateDir,
-		Verbose:         true,
-		PreservesInodes: preservesInodes,
+	if migrateErr != nil {
+		log, err2 := getCRIULogErrors(stateDir, prettyCmd)
+		if err2 == nil {
+			migrateErr = fmt.Errorf("%s %s failed\n%s", function, prettyCmd, log)
+		}
 	}
 
-	return c.c.Migrate(cmd, opts)
+	return migrateErr
 }
 
 func (c *containerLXC) TemplateApply(trigger string) error {
diff --git a/lxd/migrate.go b/lxd/migrate.go
index 627e6d0..5528aa1 100644
--- a/lxd/migrate.go
+++ b/lxd/migrate.go
@@ -6,18 +6,13 @@
 package main
 
 import (
-	"bufio"
 	"fmt"
 	"io/ioutil"
 	"net/http"
 	"net/url"
 	"os"
-	"os/exec"
-	"path"
-	"path/filepath"
 	"strings"
 	"sync"
-	"time"
 
 	"github.com/golang/protobuf/proto"
 	"github.com/gorilla/websocket"
@@ -71,15 +66,6 @@ func (c *migrationFields) send(m proto.Message) error {
 	return shared.WriteAll(w, data)
 }
 
-func findCriu(host string) error {
-	_, err := exec.LookPath("criu")
-	if err != nil {
-		return fmt.Errorf("CRIU is required for live migration but its binary couldn't be found on the %s server. Is it installed in LXD's path?", host)
-	}
-
-	return nil
-}
-
 func (c *migrationFields) recv(m proto.Message) error {
 	mt, r, err := c.controlConn.NextReader()
 	if err != nil {
@@ -158,32 +144,6 @@ func (c *migrationFields) controlChannel() <-chan MigrationControl {
 	return ch
 }
 
-func CollectCRIULogFile(c container, imagesDir string, function string, method string) error {
-	t := time.Now().Format(time.RFC3339)
-	newPath := shared.LogPath(c.Name(), fmt.Sprintf("%s_%s_%s.log", function, method, t))
-	return shared.FileCopy(filepath.Join(imagesDir, fmt.Sprintf("%s.log", method)), newPath)
-}
-
-func GetCRIULogErrors(imagesDir string, method string) (string, error) {
-	f, err := os.Open(path.Join(imagesDir, fmt.Sprintf("%s.log", method)))
-	if err != nil {
-		return "", err
-	}
-
-	defer f.Close()
-
-	scanner := bufio.NewScanner(f)
-	ret := []string{}
-	for scanner.Scan() {
-		line := scanner.Text()
-		if strings.Contains(line, "Error") {
-			ret = append(ret, scanner.Text())
-		}
-	}
-
-	return strings.Join(ret, "\n"), nil
-}
-
 type migrationSourceWs struct {
 	migrationFields
 
@@ -368,24 +328,8 @@ func (s *migrationSourceWs) Do(op *operation) error {
 		}
 		defer os.RemoveAll(checkpointDir)
 
-		err = s.container.Migrate(lxc.MIGRATE_DUMP, checkpointDir, true)
-
-		if err2 := CollectCRIULogFile(s.container, checkpointDir, "migration", "dump"); err2 != nil {
-			shared.Debugf("Error collecting checkpoint log file %s", err)
-		}
-
+		err = s.container.Migrate(lxc.MIGRATE_DUMP, checkpointDir, "migration", true)
 		if err != nil {
-			driver.Cleanup()
-			log, err2 := GetCRIULogErrors(checkpointDir, "dump")
-
-			/* couldn't find the CRIU log file which means we
-			 * didn't even get that far; give back the liblxc
-			 * error. */
-			if err2 != nil {
-				log = err.Error()
-			}
-
-			err = fmt.Errorf("checkpoint failed:\n%s", log)
 			s.sendControl(err)
 			return err
 		}
@@ -601,36 +545,12 @@ func (c *migrationSink) do() error {
 				return
 			}
 
-			defer func() {
-				err := CollectCRIULogFile(c.container, imagesDir, "migration", "restore")
-				/*
-				 * If the checkpoint fails, we won't have any log to collect,
-				 * so don't warn about that.
-				 */
-				if err != nil && !os.IsNotExist(err) {
-					shared.Debugf("Error collectiong migration log file %s", err)
-				}
-
-				os.RemoveAll(imagesDir)
-			}()
+			defer os.RemoveAll(imagesDir)
 
 			if err := RsyncRecv(shared.AddSlash(imagesDir), c.criuConn); err != nil {
 				restore <- err
 				return
 			}
-
-			/*
-			 * For unprivileged containers we need to shift the
-			 * perms on the images images so that they can be
-			 * opened by the process after it is in its user
-			 * namespace.
-			 */
-			if !c.container.IsPrivileged() {
-				if err := c.container.IdmapSet().ShiftRootfs(imagesDir); err != nil {
-					restore <- err
-					return
-				}
-			}
 		}
 
 		err := <-fsTransfer
@@ -640,15 +560,8 @@ func (c *migrationSink) do() error {
 		}
 
 		if c.live {
-			err := c.container.StartFromMigration(imagesDir)
+			err = c.container.Migrate(lxc.MIGRATE_RESTORE, imagesDir, "migration", false)
 			if err != nil {
-				log, err2 := GetCRIULogErrors(imagesDir, "restore")
-				/* restore failed before CRIU was invoked, give
-				 * back the liblxc error */
-				if err2 != nil {
-					log = err.Error()
-				}
-				err = fmt.Errorf("restore failed:\n%s", log)
 				restore <- err
 				return
 			}

From cfd2d91af6a4d45c8f87ae3ebfba5c13d2126878 Mon Sep 17 00:00:00 2001
From: Tycho Andersen <tycho.andersen at canonical.com>
Date: Tue, 14 Jun 2016 18:44:02 +0000
Subject: [PATCH 2/2] pass preservesInodes on migrate restore too

Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
---
 lxd/container_lxc.go | 19 ++++++++++---------
 lxd/migrate.go       | 13 ++++++++-----
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index e654e7a..cbd6c99 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -2686,6 +2686,14 @@ func (c *containerLXC) Migrate(cmd uint, stateDir string, function string, stop
 		shared.Log.Warn("unknown migrate call", log.Ctx{"cmd": cmd})
 	}
 
+	preservesInodes := c.storage.PreservesInodes()
+	/* This feature was only added in 2.0.1, let's not ask for it
+	 * before then or migrations will fail.
+	 */
+	if !lxc.VersionAtLeast(2, 0, 1) {
+		preservesInodes = false
+	}
+
 	var migrateErr error
 
 	/* For restore, we need an extra fork so that we daemonize monitor
@@ -2720,7 +2728,8 @@ func (c *containerLXC) Migrate(cmd uint, stateDir string, function string, stop
 			c.name,
 			c.daemon.lxcpath,
 			configPath,
-			stateDir).CombinedOutput()
+			stateDir,
+			fmt.Sprintf("%v", preservesInodes)).CombinedOutput()
 
 		if string(out) != "" {
 			for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") {
@@ -2745,14 +2754,6 @@ func (c *containerLXC) Migrate(cmd uint, stateDir string, function string, stop
 			return err
 		}
 
-		preservesInodes := c.storage.PreservesInodes()
-		/* This feature was only added in 2.0.1, let's not ask for it
-		 * before then or migrations will fail.
-		 */
-		if !lxc.VersionAtLeast(2, 0, 1) {
-			preservesInodes = false
-		}
-
 		opts := lxc.MigrateOptions{
 			Stop:            stop,
 			Directory:       stateDir,
diff --git a/lxd/migrate.go b/lxd/migrate.go
index 5528aa1..651b257 100644
--- a/lxd/migrate.go
+++ b/lxd/migrate.go
@@ -11,6 +11,7 @@ import (
 	"net/http"
 	"net/url"
 	"os"
+	"strconv"
 	"strings"
 	"sync"
 
@@ -606,7 +607,7 @@ func (c *migrationSink) do() error {
 /*
  * Similar to forkstart, this is called when lxd is invoked as:
  *
- *    lxd forkmigrate <container> <lxcpath> <path_to_config> <path_to_criu_images>
+ *    lxd forkmigrate <container> <lxcpath> <path_to_config> <path_to_criu_images> <preserves_inodes>
  *
  * liblxc's restore() sets up the processes in such a way that the monitor ends
  * up being a child of the process that calls it, in our case lxd. However, we
@@ -615,7 +616,7 @@ func (c *migrationSink) do() error {
  * footprint when we fork tasks that will never free golang's memory, etc.)
  */
 func MigrateContainer(args []string) error {
-	if len(args) != 5 {
+	if len(args) != 6 {
 		return fmt.Errorf("Bad arguments %q", args)
 	}
 
@@ -623,6 +624,7 @@ func MigrateContainer(args []string) error {
 	lxcpath := args[2]
 	configPath := args[3]
 	imagesDir := args[4]
+	preservesInodes, err := strconv.ParseBool(args[5])
 
 	c, err := lxc.NewContainer(name, lxcpath)
 	if err != nil {
@@ -638,8 +640,9 @@ func MigrateContainer(args []string) error {
 	os.Stdout.Close()
 	os.Stderr.Close()
 
-	return c.Restore(lxc.RestoreOptions{
-		Directory: imagesDir,
-		Verbose:   true,
+	return c.Migrate(lxc.MIGRATE_RESTORE, lxc.MigrateOptions{
+		Directory:       imagesDir,
+		Verbose:         true,
+		PreservesInodes: preservesInodes,
 	})
 }


More information about the lxc-devel mailing list