[lxc-devel] [lxd/stable-2.0] Stable 2.0/simplify migration code

tych0 on Github lxc-bot at linuxcontainers.org
Wed Jun 15 16:07:31 UTC 2016


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 342 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20160615/7f3ad128/attachment.bin>
-------------- next part --------------
From 26404a098eca76ac1c05cfe2b1acff563d96bc0f Mon Sep 17 00:00:00 2001
From: Tycho Andersen <tycho.andersen at canonical.com>
Date: Wed, 1 Jun 2016 10:40:55 -0600
Subject: [PATCH 1/2] c/r: switch to the new ->migrate API

We'll use this in the next patch to enable use of new liblxc features.

Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
---
 lxd/container.go     |  5 ++---
 lxd/container_lxc.go | 14 +++++++++-----
 lxd/migrate.go       |  3 +--
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/lxd/container.go b/lxd/container.go
index 9c196ca..cf7884f 100644
--- a/lxd/container.go
+++ b/lxd/container.go
@@ -341,7 +341,7 @@ type container interface {
 
 	// Snapshots & migration
 	Restore(sourceContainer container) error
-	Checkpoint(opts lxc.CheckpointOptions) error
+	Migrate(cmd uint, stateDir string, stop bool) error
 	StartFromMigration(imagesDir string) error
 	Snapshots() ([]container, error)
 
@@ -527,8 +527,7 @@ func containerCreateAsSnapshot(d *Daemon, args containerArgs, sourceContainer co
 		 * after snapshotting will fail.
 		 */
 
-		opts := lxc.CheckpointOptions{Directory: stateDir, Stop: false, Verbose: true}
-		err = sourceContainer.Checkpoint(opts)
+		err = sourceContainer.Migrate(lxc.MIGRATE_DUMP, stateDir, false)
 		err2 := CollectCRIULogFile(sourceContainer, stateDir, "snapshot", "dump")
 		if err2 != nil {
 			shared.Log.Warn("failed to collect criu log file", log.Ctx{"error": err2})
diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 2f48cfa..6f3f117 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -1385,8 +1385,7 @@ func (c *containerLXC) Stop(stateful bool) error {
 		}
 
 		// Checkpoint
-		opts := lxc.CheckpointOptions{Directory: stateDir, Stop: true, Verbose: true}
-		err = c.Checkpoint(opts)
+		err = c.Migrate(lxc.MIGRATE_DUMP, stateDir, true)
 		err2 := CollectCRIULogFile(c, stateDir, "snapshot", "dump")
 		if err2 != nil {
 			shared.Log.Warn("failed to collect criu log file", log.Ctx{"error": err2})
@@ -2712,14 +2711,19 @@ func (c *containerLXC) Export(w io.Writer) error {
 	return tw.Close()
 }
 
-func (c *containerLXC) Checkpoint(opts lxc.CheckpointOptions) error {
-	// Load the go-lxc struct
+func (c *containerLXC) Migrate(cmd uint, stateDir string, stop bool) error {
 	err := c.initLXC()
 	if err != nil {
 		return err
 	}
 
-	return c.c.Checkpoint(opts)
+	opts := lxc.MigrateOptions{
+		Stop: stop,
+		Directory: stateDir,
+		Verbose: true,
+	}
+
+	return c.c.Migrate(cmd, opts)
 }
 
 func (c *containerLXC) TemplateApply(trigger string) error {
diff --git a/lxd/migrate.go b/lxd/migrate.go
index fdaab35..627e6d0 100644
--- a/lxd/migrate.go
+++ b/lxd/migrate.go
@@ -368,8 +368,7 @@ func (s *migrationSourceWs) Do(op *operation) error {
 		}
 		defer os.RemoveAll(checkpointDir)
 
-		opts := lxc.CheckpointOptions{Stop: true, Directory: checkpointDir, Verbose: true}
-		err = s.container.Checkpoint(opts)
+		err = s.container.Migrate(lxc.MIGRATE_DUMP, checkpointDir, true)
 
 		if err2 := CollectCRIULogFile(s.container, checkpointDir, "migration", "dump"); err2 != nil {
 			shared.Debugf("Error collecting checkpoint log file %s", err)

From aba85baec275f692c4d1adff044df577518a7055 Mon Sep 17 00:00:00 2001
From: Tycho Andersen <tycho.andersen at canonical.com>
Date: Tue, 14 Jun 2016 17:40:34 +0000
Subject: [PATCH 2/2] simplify checkpoint/restore code everywhere

Some problems:

* We had various entry points for migration, each which collected logs in
  various different and inconsistent ways.
* We also had the StartFromMigrate call, and a Migrate() to which you could
  pass lxc.MIGRATE_RESTORE, which wasn't an obvious API.
* at each point we had a check that did the rootfs shifting if necessary
* we had to do findCriu everywhere manually

Now that we have a Migrate() call, let's just route everything through
that, and handle all of this in a uniform way.

Note that some findCriu calls are still prudent to do e.g. in snapshot
restore, before we actually do all the filesystem work to restore stuff if
the snapshot is stateful. I've left those sorts of calls in.

Note: this is a modified version (dropping the preserveInodes bits) from
31f1e20ca1c97e5a0b857502697fb1f9c6a8b5af.

Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
---
 lxd/container.go     |  12 +--
 lxd/container_lxc.go | 234 +++++++++++++++++++++++++++++----------------------
 lxd/migrate.go       |  93 +-------------------
 3 files changed, 137 insertions(+), 202 deletions(-)

diff --git a/lxd/container.go b/lxd/container.go
index cf7884f..166e53d 100644
--- a/lxd/container.go
+++ b/lxd/container.go
@@ -11,8 +11,6 @@ import (
 	"gopkg.in/lxc/go-lxc.v2"
 
 	"github.com/lxc/lxd/shared"
-
-	log "gopkg.in/inconshreveable/log15.v2"
 )
 
 // Helper functions
@@ -341,8 +339,7 @@ type container interface {
 
 	// Snapshots & migration
 	Restore(sourceContainer container) error
-	Migrate(cmd uint, stateDir string, stop bool) error
-	StartFromMigration(imagesDir string) error
+	Migrate(cmd uint, stateDir string, function string, stop bool) error
 	Snapshots() ([]container, error)
 
 	// Config handling
@@ -527,12 +524,7 @@ func containerCreateAsSnapshot(d *Daemon, args containerArgs, sourceContainer co
 		 * after snapshotting will fail.
 		 */
 
-		err = sourceContainer.Migrate(lxc.MIGRATE_DUMP, stateDir, false)
-		err2 := CollectCRIULogFile(sourceContainer, stateDir, "snapshot", "dump")
-		if err2 != nil {
-			shared.Log.Warn("failed to collect criu log file", log.Ctx{"error": err2})
-		}
-
+		err = sourceContainer.Migrate(lxc.MIGRATE_DUMP, stateDir, "snapshot", false)
 		if err != nil {
 			os.RemoveAll(sourceContainer.StatePath())
 			return nil, err
diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 6f3f117..7505ae2 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -2,6 +2,7 @@ package main
 
 import (
 	"archive/tar"
+	"bufio"
 	"encoding/json"
 	"fmt"
 	"io"
@@ -1157,30 +1158,7 @@ func (c *containerLXC) Start(stateful bool) error {
 			return fmt.Errorf("Container has no existing state to restore.")
 		}
 
-		if err := findCriu("snapshot"); err != nil {
-			return err
-		}
-
-		if !c.IsPrivileged() {
-			if err := c.IdmapSet().ShiftRootfs(c.StatePath()); err != nil {
-				return err
-			}
-		}
-
-		out, err := exec.Command(
-			execPath,
-			"forkmigrate",
-			c.name,
-			c.daemon.lxcpath,
-			configPath,
-			c.StatePath()).CombinedOutput()
-		if string(out) != "" {
-			for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") {
-				shared.Debugf("forkmigrate: %s", line)
-			}
-		}
-		CollectCRIULogFile(c, c.StatePath(), "snapshot", "restore")
-
+		err := c.Migrate(lxc.MIGRATE_RESTORE, c.StatePath(), "snapshot", false)
 		if err != nil && !c.IsRunning() {
 			return err
 		}
@@ -1228,41 +1206,6 @@ func (c *containerLXC) Start(stateful bool) error {
 	return nil
 }
 
-func (c *containerLXC) StartFromMigration(imagesDir string) error {
-	// Run the shared start code
-	configPath, err := c.startCommon()
-	if err != nil {
-		return err
-	}
-
-	// Start the LXC container
-	out, err := exec.Command(
-		execPath,
-		"forkmigrate",
-		c.name,
-		c.daemon.lxcpath,
-		configPath,
-		imagesDir).CombinedOutput()
-
-	if string(out) != "" {
-		for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") {
-			shared.Debugf("forkmigrate: %s", line)
-		}
-	}
-
-	if err != nil && !c.IsRunning() {
-		return fmt.Errorf(
-			"Error calling 'lxd forkmigrate %s %s %s %s': err='%v'",
-			c.name,
-			c.daemon.lxcpath,
-			filepath.Join(c.LogPath(), "lxc.conf"),
-			imagesDir,
-			err)
-	}
-
-	return nil
-}
-
 func (c *containerLXC) OnStart() error {
 	// Make sure we can't call go-lxc functions by mistake
 	c.fromHook = true
@@ -1371,10 +1314,6 @@ func (c *containerLXC) setupStopping() *sync.WaitGroup {
 func (c *containerLXC) Stop(stateful bool) error {
 	// Handle stateful stop
 	if stateful {
-		if err := findCriu("snapshot"); err != nil {
-			return err
-		}
-
 		// Cleanup any existing state
 		stateDir := c.StatePath()
 		os.RemoveAll(stateDir)
@@ -1385,12 +1324,7 @@ func (c *containerLXC) Stop(stateful bool) error {
 		}
 
 		// Checkpoint
-		err = c.Migrate(lxc.MIGRATE_DUMP, stateDir, true)
-		err2 := CollectCRIULogFile(c, stateDir, "snapshot", "dump")
-		if err2 != nil {
-			shared.Log.Warn("failed to collect criu log file", log.Ctx{"error": err2})
-		}
-
+		err = c.Migrate(lxc.MIGRATE_DUMP, stateDir, "snapshot", true)
 		if err != nil {
 			return err
 		}
@@ -1761,32 +1695,7 @@ func (c *containerLXC) Restore(sourceContainer container) error {
 	// If the container wasn't running but was stateful, should we restore
 	// it as running?
 	if shared.PathExists(c.StatePath()) {
-		configPath, err := c.startCommon()
-		if err != nil {
-			return err
-		}
-
-		if !c.IsPrivileged() {
-			if err := c.IdmapSet().ShiftRootfs(c.StatePath()); err != nil {
-				return err
-			}
-		}
-
-		out, err := exec.Command(
-			execPath,
-			"forkmigrate",
-			c.name,
-			c.daemon.lxcpath,
-			configPath,
-			c.StatePath()).CombinedOutput()
-		if string(out) != "" {
-			for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") {
-				shared.Debugf("forkmigrate: %s", line)
-			}
-		}
-		CollectCRIULogFile(c, c.StatePath(), "snapshot", "restore")
-
-		if err != nil {
+		if err := c.Migrate(lxc.MIGRATE_RESTORE, c.StatePath(), "snapshot", false); err != nil {
 			return err
 		}
 
@@ -2711,19 +2620,140 @@ func (c *containerLXC) Export(w io.Writer) error {
 	return tw.Close()
 }
 
-func (c *containerLXC) Migrate(cmd uint, stateDir string, stop bool) error {
-	err := c.initLXC()
+func collectCRIULogFile(c container, imagesDir string, function string, method string) error {
+	t := time.Now().Format(time.RFC3339)
+	newPath := shared.LogPath(c.Name(), fmt.Sprintf("%s_%s_%s.log", function, method, t))
+	return shared.FileCopy(filepath.Join(imagesDir, fmt.Sprintf("%s.log", method)), newPath)
+}
+
+func getCRIULogErrors(imagesDir string, method string) (string, error) {
+	f, err := os.Open(path.Join(imagesDir, fmt.Sprintf("%s.log", method)))
+	if err != nil {
+		return "", err
+	}
+
+	defer f.Close()
+
+	scanner := bufio.NewScanner(f)
+	ret := []string{}
+	for scanner.Scan() {
+		line := scanner.Text()
+		if strings.Contains(line, "Error") {
+			ret = append(ret, scanner.Text())
+		}
+	}
+
+	return strings.Join(ret, "\n"), nil
+}
+
+func findCriu(host string) error {
+	_, err := exec.LookPath("criu")
 	if err != nil {
+		return fmt.Errorf("CRIU is required for live migration but its binary couldn't be found on the %s server. Is it installed in LXD's path?", host)
+	}
+
+	return nil
+}
+
+func (c *containerLXC) Migrate(cmd uint, stateDir string, function string, stop bool) error {
+	if err := findCriu(function); err != nil {
 		return err
 	}
 
-	opts := lxc.MigrateOptions{
-		Stop: stop,
-		Directory: stateDir,
-		Verbose: true,
+	prettyCmd := ""
+	switch cmd {
+	case lxc.MIGRATE_PRE_DUMP:
+		prettyCmd = "pre-dump"
+	case lxc.MIGRATE_DUMP:
+		prettyCmd = "dump"
+	case lxc.MIGRATE_RESTORE:
+		prettyCmd = "restore"
+	default:
+		prettyCmd = "unknown"
+		shared.Log.Warn("unknown migrate call", log.Ctx{"cmd": cmd})
+	}
+
+	var migrateErr error
+
+	/* For restore, we need an extra fork so that we daemonize monitor
+	 * instead of having it be a child of LXD, so let's hijack the command
+	 * here and do the extra fork.
+	 */
+	if cmd == lxc.MIGRATE_RESTORE {
+		// Run the shared start
+		_, err := c.startCommon()
+		if err != nil {
+			return err
+		}
+
+		/*
+		 * For unprivileged containers we need to shift the
+		 * perms on the images images so that they can be
+		 * opened by the process after it is in its user
+		 * namespace.
+		 */
+		if !c.IsPrivileged() {
+			if err := c.IdmapSet().ShiftRootfs(stateDir); err != nil {
+				return err
+			}
+		}
+
+		configPath := filepath.Join(c.LogPath(), "lxc.conf")
+
+		var out []byte
+		out, migrateErr = exec.Command(
+			execPath,
+			"forkmigrate",
+			c.name,
+			c.daemon.lxcpath,
+			configPath,
+			stateDir).CombinedOutput()
+
+		if string(out) != "" {
+			for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") {
+				shared.Debugf("forkmigrate: %s", line)
+			}
+		}
+
+		if migrateErr != nil && !c.IsRunning() {
+			migrateErr = fmt.Errorf(
+				"Error calling 'lxd forkmigrate %s %s %s %s': err='%v' out='%v'",
+				c.name,
+				c.daemon.lxcpath,
+				filepath.Join(c.LogPath(), "lxc.conf"),
+				stateDir,
+				err,
+				string(out))
+		}
+
+	} else {
+		err := c.initLXC()
+		if err != nil {
+			return err
+		}
+
+		opts := lxc.MigrateOptions{
+			Stop:            stop,
+			Directory:       stateDir,
+			Verbose:         true,
+		}
+
+		migrateErr = c.c.Migrate(cmd, opts)
+	}
+
+	collectErr := collectCRIULogFile(c, stateDir, function, prettyCmd)
+	if collectErr != nil {
+		shared.Log.Error("Error collecting checkpoint log file", log.Ctx{"err": collectErr})
+	}
+
+	if migrateErr != nil {
+		log, err2 := getCRIULogErrors(stateDir, prettyCmd)
+		if err2 == nil {
+			migrateErr = fmt.Errorf("%s %s failed\n%s", function, prettyCmd, log)
+		}
 	}
 
-	return c.c.Migrate(cmd, opts)
+	return migrateErr
 }
 
 func (c *containerLXC) TemplateApply(trigger string) error {
diff --git a/lxd/migrate.go b/lxd/migrate.go
index 627e6d0..5528aa1 100644
--- a/lxd/migrate.go
+++ b/lxd/migrate.go
@@ -6,18 +6,13 @@
 package main
 
 import (
-	"bufio"
 	"fmt"
 	"io/ioutil"
 	"net/http"
 	"net/url"
 	"os"
-	"os/exec"
-	"path"
-	"path/filepath"
 	"strings"
 	"sync"
-	"time"
 
 	"github.com/golang/protobuf/proto"
 	"github.com/gorilla/websocket"
@@ -71,15 +66,6 @@ func (c *migrationFields) send(m proto.Message) error {
 	return shared.WriteAll(w, data)
 }
 
-func findCriu(host string) error {
-	_, err := exec.LookPath("criu")
-	if err != nil {
-		return fmt.Errorf("CRIU is required for live migration but its binary couldn't be found on the %s server. Is it installed in LXD's path?", host)
-	}
-
-	return nil
-}
-
 func (c *migrationFields) recv(m proto.Message) error {
 	mt, r, err := c.controlConn.NextReader()
 	if err != nil {
@@ -158,32 +144,6 @@ func (c *migrationFields) controlChannel() <-chan MigrationControl {
 	return ch
 }
 
-func CollectCRIULogFile(c container, imagesDir string, function string, method string) error {
-	t := time.Now().Format(time.RFC3339)
-	newPath := shared.LogPath(c.Name(), fmt.Sprintf("%s_%s_%s.log", function, method, t))
-	return shared.FileCopy(filepath.Join(imagesDir, fmt.Sprintf("%s.log", method)), newPath)
-}
-
-func GetCRIULogErrors(imagesDir string, method string) (string, error) {
-	f, err := os.Open(path.Join(imagesDir, fmt.Sprintf("%s.log", method)))
-	if err != nil {
-		return "", err
-	}
-
-	defer f.Close()
-
-	scanner := bufio.NewScanner(f)
-	ret := []string{}
-	for scanner.Scan() {
-		line := scanner.Text()
-		if strings.Contains(line, "Error") {
-			ret = append(ret, scanner.Text())
-		}
-	}
-
-	return strings.Join(ret, "\n"), nil
-}
-
 type migrationSourceWs struct {
 	migrationFields
 
@@ -368,24 +328,8 @@ func (s *migrationSourceWs) Do(op *operation) error {
 		}
 		defer os.RemoveAll(checkpointDir)
 
-		err = s.container.Migrate(lxc.MIGRATE_DUMP, checkpointDir, true)
-
-		if err2 := CollectCRIULogFile(s.container, checkpointDir, "migration", "dump"); err2 != nil {
-			shared.Debugf("Error collecting checkpoint log file %s", err)
-		}
-
+		err = s.container.Migrate(lxc.MIGRATE_DUMP, checkpointDir, "migration", true)
 		if err != nil {
-			driver.Cleanup()
-			log, err2 := GetCRIULogErrors(checkpointDir, "dump")
-
-			/* couldn't find the CRIU log file which means we
-			 * didn't even get that far; give back the liblxc
-			 * error. */
-			if err2 != nil {
-				log = err.Error()
-			}
-
-			err = fmt.Errorf("checkpoint failed:\n%s", log)
 			s.sendControl(err)
 			return err
 		}
@@ -601,36 +545,12 @@ func (c *migrationSink) do() error {
 				return
 			}
 
-			defer func() {
-				err := CollectCRIULogFile(c.container, imagesDir, "migration", "restore")
-				/*
-				 * If the checkpoint fails, we won't have any log to collect,
-				 * so don't warn about that.
-				 */
-				if err != nil && !os.IsNotExist(err) {
-					shared.Debugf("Error collectiong migration log file %s", err)
-				}
-
-				os.RemoveAll(imagesDir)
-			}()
+			defer os.RemoveAll(imagesDir)
 
 			if err := RsyncRecv(shared.AddSlash(imagesDir), c.criuConn); err != nil {
 				restore <- err
 				return
 			}
-
-			/*
-			 * For unprivileged containers we need to shift the
-			 * perms on the images images so that they can be
-			 * opened by the process after it is in its user
-			 * namespace.
-			 */
-			if !c.container.IsPrivileged() {
-				if err := c.container.IdmapSet().ShiftRootfs(imagesDir); err != nil {
-					restore <- err
-					return
-				}
-			}
 		}
 
 		err := <-fsTransfer
@@ -640,15 +560,8 @@ func (c *migrationSink) do() error {
 		}
 
 		if c.live {
-			err := c.container.StartFromMigration(imagesDir)
+			err = c.container.Migrate(lxc.MIGRATE_RESTORE, imagesDir, "migration", false)
 			if err != nil {
-				log, err2 := GetCRIULogErrors(imagesDir, "restore")
-				/* restore failed before CRIU was invoked, give
-				 * back the liblxc error */
-				if err2 != nil {
-					log = err.Error()
-				}
-				err = fmt.Errorf("restore failed:\n%s", log)
 				restore <- err
 				return
 			}


More information about the lxc-devel mailing list