[lxc-devel] [lxd/master] Make the interaction between lxd daemon and waitready non-blocking

freeekanayaka on Github lxc-bot at linuxcontainers.org
Thu Apr 19 09:19:18 UTC 2018


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 581 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20180419/13addde2/attachment.bin>
-------------- next part --------------
From 17e643e8ba430c9270a096a00d6e345bfe5b7b14 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Thu, 19 Apr 2018 08:33:27 +0000
Subject: [PATCH 1/2] Make the interaction betwean lxd daemon and waitready
 non-blocking

I think this does not change observable behavior from users, but might help us
debug problems in case waitready hangs. YMMV

Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
 lxd/api_internal.go   |  6 +++++-
 lxd/daemon.go         |  7 ++++++-
 lxd/main_waitready.go | 20 +++++++++++++++++++-
 lxd/response.go       |  1 +
 4 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/lxd/api_internal.go b/lxd/api_internal.go
index 6f0443240..7a8542e4f 100644
--- a/lxd/api_internal.go
+++ b/lxd/api_internal.go
@@ -36,7 +36,11 @@ var apiInternal = []Command{
 }
 
 func internalWaitReady(d *Daemon, r *http.Request) Response {
-	<-d.readyChan
+	select {
+	case <-d.readyChan:
+	default:
+		return Unavailable
+	}
 
 	return EmptySyncResponse
 }
diff --git a/lxd/daemon.go b/lxd/daemon.go
index 2b76ab2ec..5e6a6a757 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -237,7 +237,12 @@ func (d *Daemon) createCmd(restAPI *mux.Router, version string, c Command) {
 
 		// Block public API requests until we're done with basic
 		// initialization tasks, such setting up the cluster database.
-		<-d.setupChan
+		select {
+		case <-d.setupChan:
+		default:
+			Unavailable.Render(w)
+			return
+		}
 
 		untrustedOk := (r.Method == "GET" && c.untrustedGet) || (r.Method == "POST" && c.untrustedPost)
 		err := d.checkTrustedClient(r)
diff --git a/lxd/main_waitready.go b/lxd/main_waitready.go
index 78bb62e82..1ad4ff8be 100644
--- a/lxd/main_waitready.go
+++ b/lxd/main_waitready.go
@@ -7,6 +7,7 @@ import (
 	"github.com/spf13/cobra"
 
 	"github.com/lxc/lxd/client"
+	"github.com/lxc/lxd/shared/logger"
 )
 
 type cmdWaitready struct {
@@ -35,15 +36,32 @@ func (c *cmdWaitready) Command() *cobra.Command {
 func (c *cmdWaitready) Run(cmd *cobra.Command, args []string) error {
 	finger := make(chan error, 1)
 	go func() {
-		for {
+		for i := 0; ; i++ {
+			// Log initial attempts at debug level, but use warn
+			// level after the 10'th attempt (about 5 seconds). Then
+			// after the 30'th attempt (about 15 seconds), log only
+			// only one attempt every 10 attempts (about 5 seconds),
+			// to avoid being too verbose.
+			log := logger.Debugf
+			if i > 10 {
+				log = logger.Warnf
+				if i > 30 && !((i % 10) == 0) {
+					log = func(string, ...interface{}) {}
+				}
+			}
+
+			log("Connecting to LXD daemon (attempt %d)", i)
 			d, err := lxd.ConnectLXDUnix("", nil)
 			if err != nil {
+				log("Failed connecting to LXD daemon (attempt %d): %v", i, err)
 				time.Sleep(500 * time.Millisecond)
 				continue
 			}
 
+			log("Checking if LXD daemon is ready (attempt %d)", i)
 			_, _, err = d.RawQuery("GET", "/internal/ready", nil, "")
 			if err != nil {
+				log("Failed checking if LXD daemon is ready (attempt %d): %v", i, err)
 				time.Sleep(500 * time.Millisecond)
 				continue
 			}
diff --git a/lxd/response.go b/lxd/response.go
index cb5cb59dc..bc975d344 100644
--- a/lxd/response.go
+++ b/lxd/response.go
@@ -477,6 +477,7 @@ var NotImplemented = &errorResponse{http.StatusNotImplemented, "not implemented"
 var NotFound = &errorResponse{http.StatusNotFound, "not found"}
 var Forbidden = &errorResponse{http.StatusForbidden, "not authorized"}
 var Conflict = &errorResponse{http.StatusConflict, "already exists"}
+var Unavailable = &errorResponse{http.StatusServiceUnavailable, "unavailable"}
 
 func BadRequest(err error) Response {
 	return &errorResponse{http.StatusBadRequest, err.Error()}

From 402d911e65ad439d924b0023a9a357c894b03dab Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Thu, 19 Apr 2018 08:53:51 +0000
Subject: [PATCH 2/2] Increase logging during startup

Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
 lxd/daemon.go  |  5 +++++
 lxd/db/db.go   | 15 +++++++++++++--
 lxd/patches.go |  2 +-
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/lxd/daemon.go b/lxd/daemon.go
index 5e6a6a757..e868c855e 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -468,6 +468,7 @@ func (d *Daemon) init() error {
 
 	/* Open the cluster database */
 	for {
+		logger.Info("Initializing global database")
 		d.cluster, err = db.OpenCluster("db.bin", d.gateway.Dialer(), address)
 		if err == nil {
 			break
@@ -534,6 +535,7 @@ func (d *Daemon) init() error {
 	}
 
 	/* Read the storage pools */
+	logger.Infof("Initializing storage pools")
 	err = SetupStorageDriver(d.State(), false)
 	if err != nil {
 		return err
@@ -546,6 +548,7 @@ func (d *Daemon) init() error {
 	}
 
 	/* Setup the networks */
+	logger.Infof("Initializing networks")
 	err = networkStartup(d.State())
 	if err != nil {
 		return err
@@ -579,6 +582,7 @@ func (d *Daemon) init() error {
 		return err
 	}
 
+	logger.Infof("Loading configuration")
 	err = d.cluster.Transaction(func(tx *db.ClusterTx) error {
 		config, err := cluster.ConfigLoad(tx)
 		if err != nil {
@@ -864,6 +868,7 @@ func (d *Daemon) setupMAASController(server string, key string, machine string)
 
 // Create a database connection and perform any updates needed.
 func initializeDbObject(d *Daemon) (*db.Dump, error) {
+	logger.Info("Initializing local database")
 	// Rename the old database name if needed.
 	if shared.PathExists(d.os.LegacyLocalDatabasePath()) {
 		if shared.PathExists(d.os.LocalDatabasePath()) {
diff --git a/lxd/db/db.go b/lxd/db/db.go
index 3c540c322..37a652cfd 100644
--- a/lxd/db/db.go
+++ b/lxd/db/db.go
@@ -164,16 +164,27 @@ func OpenCluster(name string, dialer grpcsql.Dialer, address string) (*Cluster,
 	// Test that the cluster database is operational. We wait up to 10
 	// minutes, in case there's no quorum of nodes online yet.
 	timeout := time.After(10 * time.Minute)
-	for {
+	for i := 0; ; i++ {
+		// Log initial attempts at debug level, but use warn
+		// level after the 10'th attempt (about 10 seconds).
+		log := logger.Debugf
+		if i > 5 {
+			log = logger.Warnf
+		}
+
 		err = db.Ping()
 		if err == nil {
 			break
 		}
+
 		cause := errors.Cause(err)
 		if cause != context.DeadlineExceeded {
 			return nil, err
 		}
-		time.Sleep(10 * time.Second)
+
+		log("Failed connecting to global database (attempt %d): %v", i, err)
+
+		time.Sleep(2 * time.Second)
 		select {
 		case <-timeout:
 			return nil, fmt.Errorf("failed to connect to cluster database")
diff --git a/lxd/patches.go b/lxd/patches.go
index 911e0e9aa..227d35a51 100644
--- a/lxd/patches.go
+++ b/lxd/patches.go
@@ -62,7 +62,7 @@ type patch struct {
 }
 
 func (p *patch) apply(d *Daemon) error {
-	logger.Debugf("Applying patch: %s", p.name)
+	logger.Infof("Applying patch: %s", p.name)
 
 	err := p.run(p.name, d)
 	if err != nil {


More information about the lxc-devel mailing list