[lxc-devel] [lxd/master] Fix more races
tych0 on Github
lxc-bot at linuxcontainers.org
Tue Oct 18 16:08:48 UTC 2016
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 322 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20161018/b3a4e656/attachment.bin>
-------------- next part --------------
From b24188b3749b67029dd64191b40f3defc3249227 Mon Sep 17 00:00:00 2001
From: Tycho Andersen <tycho.andersen at canonical.com>
Date: Tue, 18 Oct 2016 09:37:20 -0600
Subject: [PATCH 1/2] don't use the operation as a marker of success
This caused several problems similar to #2512. Instead, let's just use
another channel.
Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
---
lxd/migrate.go | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/lxd/migrate.go b/lxd/migrate.go
index b0b3d68..061f2a5 100644
--- a/lxd/migrate.go
+++ b/lxd/migrate.go
@@ -379,6 +379,8 @@ func (s *migrationSourceWs) Do(migrateOp *operation) error {
return abort(err)
}
+ restoreSuccess := make(chan bool, 1)
+ dumpSuccess := make(chan error, 1)
if s.live {
if header.Criu == nil {
return abort(fmt.Errorf("Got no CRIU socket type for live migration"))
@@ -420,13 +422,9 @@ func (s *migrationSourceWs) Do(migrateOp *operation) error {
nil,
nil,
func(op *operation) error {
- _, err := migrateOp.WaitFinal(-1)
- if err != nil {
- return err
- }
-
- if migrateOp.status != shared.Success {
- return fmt.Errorf("restore failed: %s", op.status.String())
+ result := <-restoreSuccess
+ if !result {
+ return fmt.Errorf("restore failed, failing CRIU")
}
return nil
},
@@ -468,15 +466,14 @@ func (s *migrationSourceWs) Do(migrateOp *operation) error {
return abort(err)
}
- migrateDone := make(chan error, 1)
go func() {
- migrateDone <- s.container.Migrate(lxc.MIGRATE_DUMP, checkpointDir, "migration", true, true)
+ dumpSuccess <- s.container.Migrate(lxc.MIGRATE_DUMP, checkpointDir, "migration", true, true)
os.RemoveAll(checkpointDir)
}()
select {
/* the checkpoint failed, let's just abort */
- case err = <-migrateDone:
+ case err = <-dumpSuccess:
return abort(err)
/* the dump finished, let's continue on to the restore */
case <-dumpDone:
@@ -513,6 +510,14 @@ func (s *migrationSourceWs) Do(migrateOp *operation) error {
return err
}
+ if s.live {
+ restoreSuccess <- *msg.Success
+ err := <-dumpSuccess
+ if err != nil {
+ shared.LogErrorf("dump failed after successful restore?: %q", err)
+ }
+ }
+
if !*msg.Success {
return fmt.Errorf(*msg.Message)
}
From 78cf0f9a94615161a37653311aeef186635cfc0b Mon Sep 17 00:00:00 2001
From: Tycho Andersen <tycho.andersen at canonical.com>
Date: Tue, 18 Oct 2016 09:39:34 -0600
Subject: [PATCH 2/2] copy: wait on the source operation too
The source might need to do some cleanup (e.g. wait for the container to
actually be killed by CRIU in the case of a live migration), so let's wait
for it too.
Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
---
lxc/copy.go | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/lxc/copy.go b/lxc/copy.go
index 1f2eea4..ca69b62 100644
--- a/lxc/copy.go
+++ b/lxc/copy.go
@@ -215,6 +215,10 @@ func (c *copyCmd) copyContainer(config *lxd.Config, sourceResource string, destR
continue
}
+ if err := source.WaitForSuccess(sourceWSResponse.Operation); err != nil {
+ return err
+ }
+
// If push mode is implemented then MigrateFrom will return a
// non-waitable operation. So this needs to be conditionalized
// on pull mode.
More information about the lxc-devel
mailing list