[lxc-devel] [lxd/master] Increase go-dqlite client timeout when not-clustered
freeekanayaka on Github
lxc-bot at linuxcontainers.org
Fri Nov 2 11:38:07 UTC 2018
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 492 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20181102/83abd638/attachment.bin>
-------------- next part --------------
From 0b7d1bc213a235c7c53bcd2b81e823511a84eb63 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Fri, 2 Nov 2018 12:20:23 +0100
Subject: [PATCH] Increase go-dqlite client timeout when not-clustered
This is a workaround for #5234. Later down the road we'll want to implement a
proper fix in dqlite, as described in the issue.
Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
lxd/api_cluster.go | 24 +++++++++++++++++++++++-
lxd/daemon.go | 20 +++++++++++++++-----
lxd/db/db.go | 6 ++++++
3 files changed, 44 insertions(+), 6 deletions(-)
diff --git a/lxd/api_cluster.go b/lxd/api_cluster.go
index ee50c721f8..68b5be91c9 100644
--- a/lxd/api_cluster.go
+++ b/lxd/api_cluster.go
@@ -8,6 +8,7 @@ import (
"os"
"path/filepath"
"strconv"
+ "time"
"github.com/CanonicalLtd/go-dqlite"
"github.com/gorilla/mux"
@@ -184,7 +185,20 @@ func clusterPut(d *Daemon, r *http.Request) Response {
func clusterPutBootstrap(d *Daemon, req api.ClusterPut) Response {
run := func(op *operation) error {
- return cluster.Bootstrap(d.State(), d.gateway, req.ServerName)
+ // The default timeout when non-clustered is one minute, let's
+ // lower it down now that we'll likely have to make requests
+ // over the network.
+ //
+ // FIXME: this is a workaround for #5234.
+ d.cluster.SetDefaultTimeout(5 * time.Second)
+
+ err := cluster.Bootstrap(d.State(), d.gateway, req.ServerName)
+ if err != nil {
+ d.cluster.SetDefaultTimeout(time.Minute)
+ return err
+ }
+
+ return nil
}
resources := map[string][]string{}
resources["cluster"] = []string{}
@@ -348,8 +362,16 @@ func clusterPutJoin(d *Daemon, req api.ClusterPut) Response {
nodes[i].Address = node.Address
}
+ // The default timeout when non-clustered is one minute, let's
+ // lower it down now that we'll likely have to make requests
+ // over the network.
+ //
+ // FIXME: this is a workaround for #5234.
+ d.cluster.SetDefaultTimeout(5 * time.Second)
+
err = cluster.Join(d.State(), d.gateway, cert, req.ServerName, nodes)
if err != nil {
+ d.cluster.SetDefaultTimeout(time.Minute)
return err
}
diff --git a/lxd/daemon.go b/lxd/daemon.go
index 88c9c74db2..baa4ef2e00 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -529,17 +529,32 @@ func (d *Daemon) init() error {
return err
}
+ clustered, err := cluster.Enabled(d.db)
+ if err != nil {
+ return err
+ }
+
/* Open the cluster database */
for {
logger.Info("Initializing global database")
dir := filepath.Join(d.os.VarDir, "database")
store := d.gateway.ServerStore()
+
+ contextTimeout := 5 * time.Second
+ if !clustered {
+ // FIXME: this is a workaround for #5234. We set a very
+ // high timeout when we're not clustered, since there's
+ // actually no networking involved.
+ contextTimeout = time.Minute
+ }
+
d.cluster, err = db.OpenCluster(
"db.bin", store, address, dir,
d.config.DqliteSetupTimeout,
dqlite.WithDialFunc(d.gateway.DialFunc()),
dqlite.WithContext(d.gateway.Context()),
dqlite.WithConnectionTimeout(10*time.Second),
+ dqlite.WithContextTimeout(contextTimeout),
dqlite.WithLogFunc(cluster.DqliteLog),
)
if err == nil {
@@ -597,11 +612,6 @@ func (d *Daemon) init() error {
}
// Setup the user-agent
- clustered, err := cluster.Enabled(d.db)
- if err != nil {
- return err
- }
-
if clustered {
version.UserAgentFeatures([]string{"cluster"})
}
diff --git a/lxd/db/db.go b/lxd/db/db.go
index 02ab33ff11..b5718e8349 100644
--- a/lxd/db/db.go
+++ b/lxd/db/db.go
@@ -279,6 +279,12 @@ func ForLocalInspectionWithPreparedStmts(db *sql.DB) (*Cluster, error) {
return c, nil
}
+// SetDefaultTimeout sets the default go-dqlite driver timeout.
+func (c *Cluster) SetDefaultTimeout(timeout time.Duration) {
+ driver := c.db.Driver().(*dqlite.Driver)
+ driver.SetContextTimeout(timeout)
+}
+
// Transaction creates a new ClusterTx object and transactionally executes the
// cluster database interactions invoked by the given function. If the function
// returns no error, all database changes are committed to the cluster database
More information about the lxc-devel
mailing list