[lxc-devel] [lxd/master] Bugfixes

stgraber on Github lxc-bot at linuxcontainers.org
Sun Nov 20 04:26:28 UTC 2016


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 301 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20161120/2027912c/attachment.bin>
-------------- next part --------------
From c3fab854c51d377d2619f2be24e5304feddd8466 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Sat, 19 Nov 2016 18:43:31 -0500
Subject: [PATCH 1/7] test: Better fix LXD_DEBUG
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous change didn't really work due to internal function calls.
Instead, lets just focus on silencing things once we hit a failure.

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 test/main.sh | 67 ++++++++----------------------------------------------------
 1 file changed, 8 insertions(+), 59 deletions(-)

diff --git a/test/main.sh b/test/main.sh
index 7aeceb6..a29fd98 100755
--- a/test/main.sh
+++ b/test/main.sh
@@ -51,11 +51,7 @@ if [ -z "${LXD_BACKEND:-}" ]; then
 fi
 
 spawn_lxd() {
-  # Don't trace internal functions
   set +x
-  OLD_DEBUG=${LXD_DEBUG:-}
-  LXD_DEBUG=""
-
   # LXD_DIR is local here because since $(lxc) is actually a function, it
   # overwrites the environment and we would lose LXD_DIR's value otherwise.
 
@@ -95,47 +91,27 @@ spawn_lxd() {
 
   echo "==> Setting trust password"
   LXD_DIR="${lxddir}" lxc config set core.trust_password foo
+  if [ -n "${LXD_DEBUG:-}" ]; then
+    set -x
+  fi
 
   echo "==> Setting up networking"
   LXD_DIR="${lxddir}" lxc network attach-profile lxdbr0 default eth0
 
   echo "==> Configuring storage backend"
   "$LXD_BACKEND"_configure "${lxddir}"
-
-  # Trace everything again
-  if [ -n "${OLD_DEBUG:-}" ]; then
-    LXD_DEBUG="${OLD_DEBUG}"
-    set -x
-  fi
 }
 
 lxc() {
-  # Don't trace internal functions
-  set +x
-  OLD_DEBUG=${LXD_DEBUG:-}
-  LXD_DEBUG=""
-
-  # Call lxc_remote
   LXC_LOCAL=1
   lxc_remote "$@"
   RET=$?
   unset LXC_LOCAL
-
-  # Trace everything again
-  if [ -n "${OLD_DEBUG:-}" ]; then
-    LXD_DEBUG="${OLD_DEBUG}"
-    set -x
-  fi
-
   return ${RET}
 }
 
 lxc_remote() {
-  # Don't trace internal functions
   set +x
-  OLD_DEBUG=${LXD_DEBUG:-}
-  LXD_DEBUG=""
-
   injected=0
   cmd=$(which lxc)
 
@@ -156,16 +132,10 @@ lxc_remote() {
   if [ "${injected}" = "0" ]; then
     cmd="${cmd} ${DEBUG-}"
   fi
-  eval "${cmd}"
-  RET=$?
-
-  # Trace everything again
-  if [ -n "${OLD_DEBUG:-}" ]; then
-    LXD_DEBUG="${OLD_DEBUG}"
+  if [ -n "${LXD_DEBUG:-}" ]; then
     set -x
   fi
-
-  return ${RET}
+  eval "${cmd}"
 }
 
 gen_cert() {
@@ -229,11 +199,6 @@ check_empty_table() {
 }
 
 kill_lxd() {
-  # Don't trace internal functions
-  set +x
-  OLD_DEBUG=${LXD_DEBUG:-}
-  LXD_DEBUG=""
-
   # LXD_DIR is local here because since $(lxc) is actually a function, it
   # overwrites the environment and we would lose LXD_DIR's value otherwise.
 
@@ -325,22 +290,12 @@ kill_lxd() {
 
   # Remove the daemon from the list
   sed "\|^${daemon_dir}|d" -i "${TEST_DIR}/daemons"
-
-  # Trace everything again
-  if [ -n "${OLD_DEBUG:-}" ]; then
-    LXD_DEBUG="${OLD_DEBUG}"
-    set -x
-  fi
 }
 
 cleanup() {
-  # Don't trace internal functions
-  set +x
-  OLD_DEBUG=${LXD_DEBUG:-}
-  LXD_DEBUG=""
-
-  # Allow for failures during cleanup
-  set +e
+  # Allow for failures and stop tracing everything
+  set +ex
+  LXD_DEBUG=
 
   # Allow for inspection
   if [ -n "${LXD_INSPECT:-}" ]; then
@@ -382,12 +337,6 @@ cleanup() {
   if [ "${TEST_RESULT}" != "success" ]; then
     echo "failed test: ${TEST_CURRENT}"
   fi
-
-  # Trace everything again
-  if [ -n "${OLD_DEBUG:-}" ]; then
-    LXD_DEBUG="${OLD_DEBUG}"
-    set -x
-  fi
 }
 
 wipe() {

From ebd0d78086e024f19209e18792e0d2850589d672 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Sat, 19 Nov 2016 18:54:31 -0500
Subject: [PATCH 2/7] test: Don't depend on main.sh for filemanip
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Otherwise if main.sh is owned by a host uid which cannot be mapped into
the container, the transfer will fail.

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 test/suites/filemanip.sh | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/test/suites/filemanip.sh b/test/suites/filemanip.sh
index 171d3b4..f135e40 100644
--- a/test/suites/filemanip.sh
+++ b/test/suites/filemanip.sh
@@ -4,12 +4,14 @@ test_filemanip() {
   ensure_import_testimage
   ensure_has_localhost_remote "${LXD_ADDR}"
 
+  echo "test" > "${TEST_DIR}"/filemanip
+
   lxc launch testimage filemanip
   lxc exec filemanip -- ln -s /tmp/ /tmp/outside
-  lxc file push main.sh filemanip/tmp/outside/
+  lxc file push "${TEST_DIR}"/filemanip filemanip/tmp/outside/
 
-  [ ! -f /tmp/main.sh ]
-  lxc exec filemanip -- ls /tmp/main.sh
+  [ ! -f /tmp/filemanip ]
+  lxc exec filemanip -- ls /tmp/filemanip
 
   # missing files should return 404
   err=$(my_curl -o /dev/null -w "%{http_code}" -X GET "https://${LXD_ADDR}/1.0/containers/filemanip/files?path=/tmp/foo")

From 439c3c73fcb72b444dc32eff06857b558906d0e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Sat, 19 Nov 2016 19:09:52 -0500
Subject: [PATCH 3/7] Clarify container delete failure error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/container_lxc.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 0c7b0f1..3b9390f 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -2466,7 +2466,7 @@ func (c *containerLXC) Delete() error {
 		// Delete the container from disk
 		if shared.PathExists(c.Path()) {
 			if err := c.storage.ContainerDelete(c); err != nil {
-				shared.LogError("Failed deleting container", ctxMap)
+				shared.LogError("Failed deleting container storage", ctxMap)
 				return err
 			}
 		}
@@ -2474,7 +2474,7 @@ func (c *containerLXC) Delete() error {
 
 	// Remove the database record
 	if err := dbContainerRemove(c.daemon.db, c.Name()); err != nil {
-		shared.LogError("Failed deleting container", ctxMap)
+		shared.LogError("Failed deleting container entry", ctxMap)
 		return err
 	}
 

From 5bb98fef9b744fc5ec3d3fd1649a3bd03abf3ddc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Sat, 19 Nov 2016 19:10:03 -0500
Subject: [PATCH 4/7] Don't double delete ephemeral containers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/container_state.go | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/lxd/container_state.go b/lxd/container_state.go
index c01cefc..1a4ca8c 100644
--- a/lxd/container_state.go
+++ b/lxd/container_state.go
@@ -80,10 +80,6 @@ func containerStatePut(d *Daemon, r *http.Request) Response {
 					return err
 				}
 
-				if c.IsEphemeral() {
-					c.Delete()
-				}
-
 				return nil
 			}
 		} else {
@@ -100,10 +96,6 @@ func containerStatePut(d *Daemon, r *http.Request) Response {
 					return err
 				}
 
-				if c.IsEphemeral() {
-					c.Delete()
-				}
-
 				return nil
 			}
 		}

From ffdf03f7ad5d364d21dca5d15b7305b99ce13112 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Sat, 19 Nov 2016 22:49:37 -0500
Subject: [PATCH 5/7] Better handle concurent stop/shutdown
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #2612

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/container_lxc.go | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 3b9390f..0e996b5 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -310,7 +310,7 @@ type containerLXC struct {
 func (c *containerLXC) createOperation(action string, timeout int) (*lxcContainerOperation, error) {
 	op, _ := c.getOperation("")
 	if op != nil {
-		return nil, fmt.Errorf("Container is already running a %s operation", op.action)
+		return nil, fmt.Errorf("Container is busy running a %s operation", op.action)
 	}
 
 	lxcContainerOperationsLock.Lock()
@@ -2001,13 +2001,13 @@ func (c *containerLXC) Stop(stateful bool) error {
 	}
 
 	err = op.Wait()
-	if err != nil {
+	if err != nil && c.IsRunning() {
 		shared.LogError("Failed stopping container", ctxMap)
 		return err
 	}
 
 	shared.LogInfo("Stopped container", ctxMap)
-	return err
+	return nil
 }
 
 func (c *containerLXC) Shutdown(timeout time.Duration) error {
@@ -2020,7 +2020,7 @@ func (c *containerLXC) Shutdown(timeout time.Duration) error {
 	}
 
 	ctxMap = log.Ctx{"name": c.name,
-		"action":    op.action,
+		"action":    "shutdown",
 		"created":   c.creationDate,
 		"ephemeral": c.ephemeral,
 		"used":      c.lastUsedDate,
@@ -2043,14 +2043,14 @@ func (c *containerLXC) Shutdown(timeout time.Duration) error {
 	}
 
 	err = op.Wait()
-	if err != nil {
+	if err != nil && c.IsRunning() {
 		shared.LogError("Failed shutting down container", ctxMap)
 		return err
 	}
 
 	shared.LogInfo("Shut down container", ctxMap)
 
-	return err
+	return nil
 }
 
 func (c *containerLXC) OnStop(target string) error {

From 8b0f9a3f9f0a1e01b3e589187df2c965c6aac2b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Sat, 19 Nov 2016 22:57:32 -0500
Subject: [PATCH 6/7] Improve container error handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/container_lxc.go | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 0e996b5..8736e40 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -1924,6 +1924,12 @@ func (c *containerLXC) OnStart() error {
 // Stop functions
 func (c *containerLXC) Stop(stateful bool) error {
 	var ctxMap log.Ctx
+
+	// Check that we're not already stopped
+	if !c.IsRunning() {
+		return fmt.Errorf("The container is already stopped")
+	}
+
 	// Setup a new operation
 	op, err := c.createOperation("stop", 30)
 	if err != nil {
@@ -2013,6 +2019,11 @@ func (c *containerLXC) Stop(stateful bool) error {
 func (c *containerLXC) Shutdown(timeout time.Duration) error {
 	var ctxMap log.Ctx
 
+	// Check that we're not already stopped
+	if !c.IsRunning() {
+		return fmt.Errorf("The container is already stopped")
+	}
+
 	// Setup a new operation
 	op, err := c.createOperation("shutdown", 30)
 	if err != nil {
@@ -2159,6 +2170,16 @@ func (c *containerLXC) Freeze() error {
 		"ephemeral": c.ephemeral,
 		"used":      c.lastUsedDate}
 
+	// Check that we're not already frozen
+	if c.IsFrozen() {
+		return fmt.Errorf("The container is already frozen")
+	}
+
+	// Check that we're running
+	if !c.IsRunning() {
+		return fmt.Errorf("The container isn't running")
+	}
+
 	shared.LogInfo("Freezing container", ctxMap)
 
 	// Load the go-lxc struct
@@ -2187,6 +2208,16 @@ func (c *containerLXC) Unfreeze() error {
 		"ephemeral": c.ephemeral,
 		"used":      c.lastUsedDate}
 
+	// Check that we're frozen
+	if !c.IsFrozen() {
+		return fmt.Errorf("The container is already running")
+	}
+
+	// Check that we're running
+	if !c.IsRunning() {
+		return fmt.Errorf("The container isn't running")
+	}
+
 	shared.LogInfo("Unfreezing container", ctxMap)
 
 	// Load the go-lxc struct

From f8dc2029cef89c2b8b753b2b8a742a7ea44a2021 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Sat, 19 Nov 2016 23:21:18 -0500
Subject: [PATCH 7/7] Improve container locking mechanism
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

 - Removes adjustable timeout (we were using 30s everywhere)
 - Merge stop and shutdown operations
 - Allows for re-usable operations
 - Make Shutdown() re-use any existing stop operation which was
   triggered by a prior call to Shutdown()
 - Make Stop() re-use any existng stop operations which was triggered by
   a prior call to Shutdown()

This effectively still prevents Start() from kicking in before a
container is fully shutdown and it also still prevents concurrent calls
to Stop(), but it now allows for multiple calls to Shutdown() to work
(just sending the shutdown signal multiple times) and it also allows for
a forceful shutdown to work after a clean shutdown attempt.

Closes #2612

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/container_lxc.go | 59 +++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 19 deletions(-)

diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 8736e40..0a51860 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -31,29 +31,45 @@ import (
 
 // Operation locking
 type lxcContainerOperation struct {
-	action   string
-	chanDone chan error
-	err      error
-	id       int
-	timeout  int
+	action    string
+	chanDone  chan error
+	chanReset chan bool
+	err       error
+	id        int
+	reusable  bool
 }
 
-func (op *lxcContainerOperation) Create(id int, action string, timeout int) *lxcContainerOperation {
+func (op *lxcContainerOperation) Create(id int, action string, reusable bool) *lxcContainerOperation {
 	op.id = id
 	op.action = action
-	op.timeout = timeout
+	op.reusable = reusable
 	op.chanDone = make(chan error, 0)
+	op.chanReset = make(chan bool, 0)
 
-	if timeout > 1 {
-		go func(op *lxcContainerOperation) {
-			time.Sleep(time.Second * time.Duration(op.timeout))
-			op.Done(fmt.Errorf("Container %s operation timed out after %d seconds", op.action, op.timeout))
-		}(op)
-	}
+	go func(op *lxcContainerOperation) {
+		for {
+			select {
+			case <-op.chanReset:
+				continue
+			case <-time.After(time.Second * 30):
+				op.Done(fmt.Errorf("Container %s operation timed out after 30 seconds", op.action))
+				return
+			}
+		}
+	}(op)
 
 	return op
 }
 
+func (op *lxcContainerOperation) Reset() error {
+	if !op.reusable {
+		return fmt.Errorf("Can't reset a non-reusable operation")
+	}
+
+	op.chanReset <- true
+	return nil
+}
+
 func (op *lxcContainerOperation) Wait() error {
 	<-op.chanDone
 
@@ -307,9 +323,14 @@ type containerLXC struct {
 	storage  storage
 }
 
-func (c *containerLXC) createOperation(action string, timeout int) (*lxcContainerOperation, error) {
+func (c *containerLXC) createOperation(action string, reusable bool, reuse bool) (*lxcContainerOperation, error) {
 	op, _ := c.getOperation("")
 	if op != nil {
+		if reuse && op.reusable {
+			op.Reset()
+			return op, nil
+		}
+
 		return nil, fmt.Errorf("Container is busy running a %s operation", op.action)
 	}
 
@@ -317,7 +338,7 @@ func (c *containerLXC) createOperation(action string, timeout int) (*lxcContaine
 	defer lxcContainerOperationsLock.Unlock()
 
 	op = &lxcContainerOperation{}
-	op.Create(c.id, action, timeout)
+	op.Create(c.id, action, reusable)
 	lxcContainerOperations[c.id] = op
 
 	return lxcContainerOperations[c.id], nil
@@ -1712,7 +1733,7 @@ func (c *containerLXC) Start(stateful bool) error {
 	var ctxMap log.Ctx
 
 	// Setup a new operation
-	op, err := c.createOperation("start", 30)
+	op, err := c.createOperation("start", false, false)
 	if err != nil {
 		return err
 	}
@@ -1931,7 +1952,7 @@ func (c *containerLXC) Stop(stateful bool) error {
 	}
 
 	// Setup a new operation
-	op, err := c.createOperation("stop", 30)
+	op, err := c.createOperation("stop", false, true)
 	if err != nil {
 		return err
 	}
@@ -2025,7 +2046,7 @@ func (c *containerLXC) Shutdown(timeout time.Duration) error {
 	}
 
 	// Setup a new operation
-	op, err := c.createOperation("shutdown", 30)
+	op, err := c.createOperation("stop", true, true)
 	if err != nil {
 		return err
 	}
@@ -2073,7 +2094,7 @@ func (c *containerLXC) OnStop(target string) error {
 
 	// Get operation
 	op, _ := c.getOperation("")
-	if op != nil && !shared.StringInSlice(op.action, []string{"stop", "shutdown"}) {
+	if op != nil && op.action != "stop" {
 		return fmt.Errorf("Container is already running a %s operation", op.action)
 	}
 


More information about the lxc-devel mailing list