[lxc-devel] [lxd/master] Make the interaction between lxd daemon and waitready non-blocking
freeekanayaka on Github
lxc-bot at linuxcontainers.org
Thu Apr 19 09:19:18 UTC 2018
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 581 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20180419/13addde2/attachment.bin>
-------------- next part --------------
From 17e643e8ba430c9270a096a00d6e345bfe5b7b14 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Thu, 19 Apr 2018 08:33:27 +0000
Subject: [PATCH 1/2] Make the interaction betwean lxd daemon and waitready
non-blocking
I think this does not change observable behavior from users, but might help us
debug problems in case waitready hangs. YMMV
Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
lxd/api_internal.go | 6 +++++-
lxd/daemon.go | 7 ++++++-
lxd/main_waitready.go | 20 +++++++++++++++++++-
lxd/response.go | 1 +
4 files changed, 31 insertions(+), 3 deletions(-)
diff --git a/lxd/api_internal.go b/lxd/api_internal.go
index 6f0443240..7a8542e4f 100644
--- a/lxd/api_internal.go
+++ b/lxd/api_internal.go
@@ -36,7 +36,11 @@ var apiInternal = []Command{
}
func internalWaitReady(d *Daemon, r *http.Request) Response {
- <-d.readyChan
+ select {
+ case <-d.readyChan:
+ default:
+ return Unavailable
+ }
return EmptySyncResponse
}
diff --git a/lxd/daemon.go b/lxd/daemon.go
index 2b76ab2ec..5e6a6a757 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -237,7 +237,12 @@ func (d *Daemon) createCmd(restAPI *mux.Router, version string, c Command) {
// Block public API requests until we're done with basic
// initialization tasks, such setting up the cluster database.
- <-d.setupChan
+ select {
+ case <-d.setupChan:
+ default:
+ Unavailable.Render(w)
+ return
+ }
untrustedOk := (r.Method == "GET" && c.untrustedGet) || (r.Method == "POST" && c.untrustedPost)
err := d.checkTrustedClient(r)
diff --git a/lxd/main_waitready.go b/lxd/main_waitready.go
index 78bb62e82..1ad4ff8be 100644
--- a/lxd/main_waitready.go
+++ b/lxd/main_waitready.go
@@ -7,6 +7,7 @@ import (
"github.com/spf13/cobra"
"github.com/lxc/lxd/client"
+ "github.com/lxc/lxd/shared/logger"
)
type cmdWaitready struct {
@@ -35,15 +36,32 @@ func (c *cmdWaitready) Command() *cobra.Command {
func (c *cmdWaitready) Run(cmd *cobra.Command, args []string) error {
finger := make(chan error, 1)
go func() {
- for {
+ for i := 0; ; i++ {
+ // Log initial attempts at debug level, but use warn
+ // level after the 10'th attempt (about 5 seconds). Then
+ // after the 30'th attempt (about 15 seconds), log only
+ // only one attempt every 10 attempts (about 5 seconds),
+ // to avoid being too verbose.
+ log := logger.Debugf
+ if i > 10 {
+ log = logger.Warnf
+ if i > 30 && !((i % 10) == 0) {
+ log = func(string, ...interface{}) {}
+ }
+ }
+
+ log("Connecting to LXD daemon (attempt %d)", i)
d, err := lxd.ConnectLXDUnix("", nil)
if err != nil {
+ log("Failed connecting to LXD daemon (attempt %d): %v", i, err)
time.Sleep(500 * time.Millisecond)
continue
}
+ log("Checking if LXD daemon is ready (attempt %d)", i)
_, _, err = d.RawQuery("GET", "/internal/ready", nil, "")
if err != nil {
+ log("Failed checking if LXD daemon is ready (attempt %d): %v", i, err)
time.Sleep(500 * time.Millisecond)
continue
}
diff --git a/lxd/response.go b/lxd/response.go
index cb5cb59dc..bc975d344 100644
--- a/lxd/response.go
+++ b/lxd/response.go
@@ -477,6 +477,7 @@ var NotImplemented = &errorResponse{http.StatusNotImplemented, "not implemented"
var NotFound = &errorResponse{http.StatusNotFound, "not found"}
var Forbidden = &errorResponse{http.StatusForbidden, "not authorized"}
var Conflict = &errorResponse{http.StatusConflict, "already exists"}
+var Unavailable = &errorResponse{http.StatusServiceUnavailable, "unavailable"}
func BadRequest(err error) Response {
return &errorResponse{http.StatusBadRequest, err.Error()}
From 402d911e65ad439d924b0023a9a357c894b03dab Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Thu, 19 Apr 2018 08:53:51 +0000
Subject: [PATCH 2/2] Increase logging during startup
Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
lxd/daemon.go | 5 +++++
lxd/db/db.go | 15 +++++++++++++--
lxd/patches.go | 2 +-
3 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/lxd/daemon.go b/lxd/daemon.go
index 5e6a6a757..e868c855e 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -468,6 +468,7 @@ func (d *Daemon) init() error {
/* Open the cluster database */
for {
+ logger.Info("Initializing global database")
d.cluster, err = db.OpenCluster("db.bin", d.gateway.Dialer(), address)
if err == nil {
break
@@ -534,6 +535,7 @@ func (d *Daemon) init() error {
}
/* Read the storage pools */
+ logger.Infof("Initializing storage pools")
err = SetupStorageDriver(d.State(), false)
if err != nil {
return err
@@ -546,6 +548,7 @@ func (d *Daemon) init() error {
}
/* Setup the networks */
+ logger.Infof("Initializing networks")
err = networkStartup(d.State())
if err != nil {
return err
@@ -579,6 +582,7 @@ func (d *Daemon) init() error {
return err
}
+ logger.Infof("Loading configuration")
err = d.cluster.Transaction(func(tx *db.ClusterTx) error {
config, err := cluster.ConfigLoad(tx)
if err != nil {
@@ -864,6 +868,7 @@ func (d *Daemon) setupMAASController(server string, key string, machine string)
// Create a database connection and perform any updates needed.
func initializeDbObject(d *Daemon) (*db.Dump, error) {
+ logger.Info("Initializing local database")
// Rename the old database name if needed.
if shared.PathExists(d.os.LegacyLocalDatabasePath()) {
if shared.PathExists(d.os.LocalDatabasePath()) {
diff --git a/lxd/db/db.go b/lxd/db/db.go
index 3c540c322..37a652cfd 100644
--- a/lxd/db/db.go
+++ b/lxd/db/db.go
@@ -164,16 +164,27 @@ func OpenCluster(name string, dialer grpcsql.Dialer, address string) (*Cluster,
// Test that the cluster database is operational. We wait up to 10
// minutes, in case there's no quorum of nodes online yet.
timeout := time.After(10 * time.Minute)
- for {
+ for i := 0; ; i++ {
+ // Log initial attempts at debug level, but use warn
+ // level after the 10'th attempt (about 10 seconds).
+ log := logger.Debugf
+ if i > 5 {
+ log = logger.Warnf
+ }
+
err = db.Ping()
if err == nil {
break
}
+
cause := errors.Cause(err)
if cause != context.DeadlineExceeded {
return nil, err
}
- time.Sleep(10 * time.Second)
+
+ log("Failed connecting to global database (attempt %d): %v", i, err)
+
+ time.Sleep(2 * time.Second)
select {
case <-timeout:
return nil, fmt.Errorf("failed to connect to cluster database")
diff --git a/lxd/patches.go b/lxd/patches.go
index 911e0e9aa..227d35a51 100644
--- a/lxd/patches.go
+++ b/lxd/patches.go
@@ -62,7 +62,7 @@ type patch struct {
}
func (p *patch) apply(d *Daemon) error {
- logger.Debugf("Applying patch: %s", p.name)
+ logger.Infof("Applying patch: %s", p.name)
err := p.run(p.name, d)
if err != nil {
More information about the lxc-devel
mailing list