[lxc-devel] [lxd/master] Increase go-dqlite client timeout when not-clustered

freeekanayaka on Github lxc-bot at linuxcontainers.org
Fri Nov 2 11:38:07 UTC 2018


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 492 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20181102/83abd638/attachment.bin>
-------------- next part --------------
From 0b7d1bc213a235c7c53bcd2b81e823511a84eb63 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Fri, 2 Nov 2018 12:20:23 +0100
Subject: [PATCH] Increase go-dqlite client timeout when not-clustered

This is a workaround for #5234. Later down the road we'll want to implement a
proper fix in dqlite, as described in the issue.

Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
 lxd/api_cluster.go | 24 +++++++++++++++++++++++-
 lxd/daemon.go      | 20 +++++++++++++++-----
 lxd/db/db.go       |  6 ++++++
 3 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/lxd/api_cluster.go b/lxd/api_cluster.go
index ee50c721f8..68b5be91c9 100644
--- a/lxd/api_cluster.go
+++ b/lxd/api_cluster.go
@@ -8,6 +8,7 @@ import (
 	"os"
 	"path/filepath"
 	"strconv"
+	"time"
 
 	"github.com/CanonicalLtd/go-dqlite"
 	"github.com/gorilla/mux"
@@ -184,7 +185,20 @@ func clusterPut(d *Daemon, r *http.Request) Response {
 
 func clusterPutBootstrap(d *Daemon, req api.ClusterPut) Response {
 	run := func(op *operation) error {
-		return cluster.Bootstrap(d.State(), d.gateway, req.ServerName)
+		// The default timeout when non-clustered is one minute, let's
+		// lower it down now that we'll likely have to make requests
+		// over the network.
+		//
+		// FIXME: this is a workaround for #5234.
+		d.cluster.SetDefaultTimeout(5 * time.Second)
+
+		err := cluster.Bootstrap(d.State(), d.gateway, req.ServerName)
+		if err != nil {
+			d.cluster.SetDefaultTimeout(time.Minute)
+			return err
+		}
+
+		return nil
 	}
 	resources := map[string][]string{}
 	resources["cluster"] = []string{}
@@ -348,8 +362,16 @@ func clusterPutJoin(d *Daemon, req api.ClusterPut) Response {
 			nodes[i].Address = node.Address
 		}
 
+		// The default timeout when non-clustered is one minute, let's
+		// lower it down now that we'll likely have to make requests
+		// over the network.
+		//
+		// FIXME: this is a workaround for #5234.
+		d.cluster.SetDefaultTimeout(5 * time.Second)
+
 		err = cluster.Join(d.State(), d.gateway, cert, req.ServerName, nodes)
 		if err != nil {
+			d.cluster.SetDefaultTimeout(time.Minute)
 			return err
 		}
 
diff --git a/lxd/daemon.go b/lxd/daemon.go
index 88c9c74db2..baa4ef2e00 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -529,17 +529,32 @@ func (d *Daemon) init() error {
 		return err
 	}
 
+	clustered, err := cluster.Enabled(d.db)
+	if err != nil {
+		return err
+	}
+
 	/* Open the cluster database */
 	for {
 		logger.Info("Initializing global database")
 		dir := filepath.Join(d.os.VarDir, "database")
 		store := d.gateway.ServerStore()
+
+		contextTimeout := 5 * time.Second
+		if !clustered {
+			// FIXME: this is a workaround for #5234. We set a very
+			// high timeout when we're not clustered, since there's
+			// actually no networking involved.
+			contextTimeout = time.Minute
+		}
+
 		d.cluster, err = db.OpenCluster(
 			"db.bin", store, address, dir,
 			d.config.DqliteSetupTimeout,
 			dqlite.WithDialFunc(d.gateway.DialFunc()),
 			dqlite.WithContext(d.gateway.Context()),
 			dqlite.WithConnectionTimeout(10*time.Second),
+			dqlite.WithContextTimeout(contextTimeout),
 			dqlite.WithLogFunc(cluster.DqliteLog),
 		)
 		if err == nil {
@@ -597,11 +612,6 @@ func (d *Daemon) init() error {
 	}
 
 	// Setup the user-agent
-	clustered, err := cluster.Enabled(d.db)
-	if err != nil {
-		return err
-	}
-
 	if clustered {
 		version.UserAgentFeatures([]string{"cluster"})
 	}
diff --git a/lxd/db/db.go b/lxd/db/db.go
index 02ab33ff11..b5718e8349 100644
--- a/lxd/db/db.go
+++ b/lxd/db/db.go
@@ -279,6 +279,12 @@ func ForLocalInspectionWithPreparedStmts(db *sql.DB) (*Cluster, error) {
 	return c, nil
 }
 
+// SetDefaultTimeout sets the default go-dqlite driver timeout.
+func (c *Cluster) SetDefaultTimeout(timeout time.Duration) {
+	driver := c.db.Driver().(*dqlite.Driver)
+	driver.SetContextTimeout(timeout)
+}
+
 // Transaction creates a new ClusterTx object and transactionally executes the
 // cluster database interactions invoked by the given function. If the function
 // returns no error, all database changes are committed to the cluster database


More information about the lxc-devel mailing list