[lxc-devel] [lxd/stable-2.0] Gracefully cancel tasks on daemon shutdown

freeekanayaka on Github lxc-bot at linuxcontainers.org
Fri Nov 24 07:44:23 UTC 2017


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 690 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20171124/055f1870/attachment.bin>
-------------- next part --------------
From d8253c306a84b495b8490b47c2a1087a6b10813b Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Mon, 23 Oct 2017 08:56:43 +0000
Subject: [PATCH] Gracefully cancel tasks on daemon shutdown

The various periodic tasks that the Daemon spawns have been changed so
they do their best to gracefully terminate as soon as possible when
the given context gets cancelled (which happens upn daemon
shutdown). Due to some of underlying APIs not supporting cancellation
yet, this should be considered just a starting point.

Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
 lxd/container_instance_types.go | 37 +++++++++++++++++++++++++++++++++----
 lxd/daemon.go                   |  6 ++++--
 lxd/images.go                   | 37 +++++++++++++++++++++++++++++--------
 lxd/logging.go                  | 31 ++++++++++++++++++++++++++-----
 lxd/util/http.go                | 16 ++++++++++++----
 5 files changed, 104 insertions(+), 23 deletions(-)

diff --git a/lxd/container_instance_types.go b/lxd/container_instance_types.go
index 7a2e3cf5e..a2e1c6963 100644
--- a/lxd/container_instance_types.go
+++ b/lxd/container_instance_types.go
@@ -64,13 +64,32 @@ func instanceLoadCache() error {
 }
 
 func instanceRefreshTypesTask(d *Daemon) (task.Func, task.Schedule) {
-	f := func(context.Context) {
-		instanceRefreshTypes(d)
+	// This is basically a check of whether we're on Go >= 1.8 and
+	// http.Request has cancellation support. If that's the case, it will
+	// be used internally by instanceRefreshTypes to terminate gracefully,
+	// otherwise we'll wrap instanceRefreshTypes in a goroutine and force
+	// returning in case the context expires.
+	_, hasCancellationSupport := interface{}(&http.Request{}).(util.ContextAwareRequest)
+	f := func(ctx context.Context) {
+		if hasCancellationSupport {
+			instanceRefreshTypes(ctx, d)
+		} else {
+			ch := make(chan struct{})
+			go func() {
+				instanceRefreshTypes(ctx, d)
+				ch <- struct{}{}
+			}()
+			select {
+			case <-ctx.Done():
+				return
+			case <-ch:
+			}
+		}
 	}
 	return f, task.Daily()
 }
 
-func instanceRefreshTypes(d *Daemon) error {
+func instanceRefreshTypes(ctx context.Context, d *Daemon) error {
 	logger.Info("Updating instance types")
 
 	// Attempt to download the new definitions
@@ -89,10 +108,18 @@ func instanceRefreshTypes(d *Daemon) error {
 
 		httpReq.Header.Set("User-Agent", version.UserAgent)
 
+		cancelableRequest, ok := interface{}(httpReq).(util.ContextAwareRequest)
+		if ok {
+			httpReq = cancelableRequest.WithContext(ctx)
+		}
+
 		resp, err := httpClient.Do(httpReq)
 		if err != nil {
 			return err
 		}
+		if ctx.Err() != nil {
+			return ctx.Err()
+		}
 		defer resp.Body.Close()
 
 		if resp.StatusCode != http.StatusOK {
@@ -121,7 +148,9 @@ func instanceRefreshTypes(d *Daemon) error {
 	sources := map[string]string{}
 	err := downloadParse(".yaml", &sources)
 	if err != nil {
-		logger.Warnf("Failed to update instance types: %v", err)
+		if err != ctx.Err() {
+			logger.Warnf("Failed to update instance types: %v", err)
+		}
 		return err
 	}
 
diff --git a/lxd/daemon.go b/lxd/daemon.go
index fa2a65d27..04bbef9e0 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -378,7 +378,9 @@ func (d *Daemon) Ready() error {
 	// FIXME: There's no hard reason for which we should not run tasks in
 	//        mock mode. However it requires that we tweak the tasks so
 	//        they exit gracefully without blocking (something we should
-	//        do anyways) and they don't hit the internet or similar.
+	//        do anyways) and they don't hit the internet or similar. Support
+	//        for proper cancellation is something that has been started but
+	//        has not been fully completed.
 	if !d.os.MockMode {
 		d.tasks.Start()
 	}
@@ -430,7 +432,7 @@ func (d *Daemon) Stop() error {
 		trackError(d.endpoints.Down())
 	}
 
-	trackError(d.tasks.Stop(5 * time.Second)) // Give tasks at most five seconds to cleanup.
+	trackError(d.tasks.Stop(time.Second)) // Give tasks at second to cleanup.
 
 	if d.db != nil {
 		if n, err := d.numRunningContainers(); err != nil || n == 0 {
diff --git a/lxd/images.go b/lxd/images.go
index 38180fda0..0525a7792 100644
--- a/lxd/images.go
+++ b/lxd/images.go
@@ -831,8 +831,8 @@ func imagesGet(d *Daemon, r *http.Request) Response {
 var imagesCmd = Command{name: "images", post: imagesPost, untrustedGet: true, get: imagesGet}
 
 func autoUpdateImagesTask(d *Daemon) (task.Func, task.Schedule) {
-	f := func(context.Context) {
-		autoUpdateImages(d)
+	f := func(ctx context.Context) {
+		autoUpdateImages(ctx, d)
 	}
 	schedule := func() (time.Duration, error) {
 		interval := daemonConfig["images.auto_update_interval"].GetInt64()
@@ -841,7 +841,7 @@ func autoUpdateImagesTask(d *Daemon) (task.Func, task.Schedule) {
 	return f, schedule
 }
 
-func autoUpdateImages(d *Daemon) {
+func autoUpdateImages(ctx context.Context, d *Daemon) {
 	logger.Infof("Updating images")
 
 	images, err := d.db.ImagesGet(false)
@@ -861,7 +861,19 @@ func autoUpdateImages(d *Daemon) {
 			continue
 		}
 
-		autoUpdateImage(d, nil, id, info)
+		// FIXME: since our APIs around image downloading don't support
+		//        cancelling, we run the function in a different
+		//        goroutine and simply abort when the context expires.
+		ch := make(chan struct{})
+		go func() {
+			autoUpdateImage(d, nil, id, info)
+			ch <- struct{}{}
+		}()
+		select {
+		case <-ctx.Done():
+			return
+		case <-ch:
+		}
 	}
 
 	logger.Infof("Done updating images")
@@ -938,18 +950,18 @@ func autoUpdateImage(d *Daemon, op *operation, id int, info *api.Image) error {
 }
 
 func pruneExpiredImagesTask(d *Daemon) (task.Func, task.Schedule) {
-	f := func(context.Context) {
-		pruneExpiredImages(d)
+	f := func(ctx context.Context) {
+		pruneExpiredImages(ctx, d)
 	}
 
 	// Skip the first run, and instead run an initial pruning synchronously
 	// before we start updating images later on in the start up process.
-	pruneExpiredImages(d)
+	pruneExpiredImages(context.Background(), d)
 
 	return f, task.Daily(task.SkipFirst)
 }
 
-func pruneExpiredImages(d *Daemon) {
+func pruneExpiredImages(ctx context.Context, d *Daemon) {
 	// Get the list of expired images.
 	expiry := daemonConfig["images.remote_cache_expiry"].GetInt64()
 
@@ -967,6 +979,15 @@ func pruneExpiredImages(d *Daemon) {
 
 	// Delete them
 	for _, fp := range images {
+		// At each iteration we check if we got cancelled in the
+		// meantime. It is safe to abort here since anything not
+		// expired now will be expired at the next run.
+		select {
+		case <-ctx.Done():
+			return
+		default:
+		}
+
 		if err := doDeleteImage(d, fp); err != nil {
 			logger.Error("Error deleting image", log.Ctx{"err": err, "fp": fp})
 		}
diff --git a/lxd/logging.go b/lxd/logging.go
index 01b0e9c47..6587149cd 100644
--- a/lxd/logging.go
+++ b/lxd/logging.go
@@ -18,9 +18,9 @@ import (
 // This task function expires logs when executed. It's started by the Daemon
 // and will run once every 24h.
 func expireLogsTask(state *state.State) (task.Func, task.Schedule) {
-	f := func(context.Context) {
+	f := func(ctx context.Context) {
 		logger.Infof("Expiring log files")
-		err := expireLogs(state)
+		err := expireLogs(ctx, state)
 		if err != nil {
 			logger.Error("Failed to expire logs", log.Ctx{"err": err})
 		}
@@ -29,13 +29,27 @@ func expireLogsTask(state *state.State) (task.Func, task.Schedule) {
 	return f, task.Daily()
 }
 
-func expireLogs(state *state.State) error {
+func expireLogs(ctx context.Context, state *state.State) error {
 	entries, err := ioutil.ReadDir(state.OS.LogDir)
 	if err != nil {
 		return err
 	}
 
-	result, err := state.DB.ContainersList(db.CTypeRegular)
+	// FIXME: our DB APIs don't yet support cancellation, se we need to run
+	//        them in a goroutine and abort this task if the context gets
+	//        cancelled.
+	var containers []string
+	ch := make(chan struct{})
+	go func() {
+		containers, err = state.DB.ContainersList(db.CTypeRegular)
+		ch <- struct{}{}
+	}()
+	select {
+	case <-ctx.Done():
+		return nil // Context expired
+	case <-ch:
+	}
+
 	if err != nil {
 		return err
 	}
@@ -58,8 +72,15 @@ func expireLogs(state *state.State) error {
 	}
 
 	for _, entry := range entries {
+		// At each iteration we check if we got cancelled in the meantime.
+		select {
+		case <-ctx.Done():
+			return nil
+		default:
+		}
+
 		// Check if the container still exists
-		if shared.StringInSlice(entry.Name(), result) {
+		if shared.StringInSlice(entry.Name(), containers) {
 			// Remove any log file which wasn't modified in the past 48 hours
 			logs, err := ioutil.ReadDir(shared.LogPath(entry.Name()))
 			if err != nil {
diff --git a/lxd/util/http.go b/lxd/util/http.go
index 6ee0d5b00..934b55358 100644
--- a/lxd/util/http.go
+++ b/lxd/util/http.go
@@ -17,6 +17,7 @@ import (
 	"time"
 
 	log "github.com/lxc/lxd/shared/log15"
+	"golang.org/x/net/context"
 
 	"github.com/lxc/lxd/shared"
 	"github.com/lxc/lxd/shared/logger"
@@ -102,7 +103,7 @@ func IsTrustedClient(r *http.Request, trustedCerts []x509.Certificate) bool {
 	}
 
 	for i := range r.TLS.PeerCertificates {
-		if checkTrustState(*r.TLS.PeerCertificates[i], trustedCerts) {
+		if CheckTrustState(*r.TLS.PeerCertificates[i], trustedCerts) {
 			return true
 		}
 	}
@@ -110,9 +111,16 @@ func IsTrustedClient(r *http.Request, trustedCerts []x509.Certificate) bool {
 	return false
 }
 
-// Check whether the given client certificate is trusted (i.e. it has a valid
-// time span and it belongs to the given list of trusted certificates).
-func checkTrustState(cert x509.Certificate, trustedCerts []x509.Certificate) bool {
+// ContextAwareRequest is an interface implemented by http.Request starting
+// from Go 1.8. It supports graceful cancellation using a context.
+type ContextAwareRequest interface {
+	WithContext(ctx context.Context) *http.Request
+}
+
+// CheckTrustState checks whether the given client certificate is trusted
+// (i.e. it has a valid time span and it belongs to the given list of trusted
+// certificates).
+func CheckTrustState(cert x509.Certificate, trustedCerts []x509.Certificate) bool {
 	// Extra validity check (should have been caught by TLS stack)
 	if time.Now().Before(cert.NotBefore) || time.Now().After(cert.NotAfter) {
 		return false


More information about the lxc-devel mailing list