[lxc-devel] [lxd/master] Retry harder upon transient database errors

freeekanayaka on Github lxc-bot at linuxcontainers.org
Thu Oct 8 14:54:56 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 347 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20201008/6aedde7a/attachment.bin>
-------------- next part --------------
From 6da737988a5fc96724446109a32c10e82bc4154b Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Thu, 8 Oct 2020 16:16:36 +0200
Subject: [PATCH 1/5] db: Retry transient errors for longer

We used to retry for a littl more than a second, which is not enough in some
cases.

We now retry for about 25 seconds and with a higher attempt frequency.

Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
 lxd/db/query/retry.go | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/lxd/db/query/retry.go b/lxd/db/query/retry.go
index a67f3dc5ac..8c62026843 100644
--- a/lxd/db/query/retry.go
+++ b/lxd/db/query/retry.go
@@ -5,12 +5,16 @@ import (
 	"strings"
 	"time"
 
+	"github.com/Rican7/retry/jitter"
+	"github.com/canonical/go-dqlite/driver"
 	"github.com/mattn/go-sqlite3"
 	"github.com/pkg/errors"
 
 	"github.com/lxc/lxd/shared/logger"
 )
 
+const maxRetries = 250
+
 // Retry wraps a function that interacts with the database, and retries it in
 // case a transient error is hit.
 //
@@ -18,7 +22,7 @@ import (
 func Retry(f func() error) error {
 	// TODO: the retry loop should be configurable.
 	var err error
-	for i := 0; i < 5; i++ {
+	for i := 0; i < maxRetries; i++ {
 		err = f()
 		if err != nil {
 			// No point in re-trying or logging a no-row error.
@@ -29,8 +33,12 @@ func Retry(f func() error) error {
 			// Process actual errors.
 			logger.Debugf("Database error: %#v", err)
 			if IsRetriableError(err) {
+				if i == maxRetries {
+					logger.Warnf("Give up retring database error: %v", err)
+					break
+				}
 				logger.Debugf("Retry failed db interaction (%v)", err)
-				time.Sleep(250 * time.Millisecond)
+				time.Sleep(jitter.Deviation(nil, 0.8)(100 * time.Millisecond))
 				continue
 			}
 		}

From 0978355a1ba22a6552ab8cb13bce99808dfcb014 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Thu, 8 Oct 2020 16:18:10 +0200
Subject: [PATCH 2/5] db: Always retry driver.ErrBusy, regardless of the error
 message

Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
 lxd/db/query/retry.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lxd/db/query/retry.go b/lxd/db/query/retry.go
index 8c62026843..b8a3fb1116 100644
--- a/lxd/db/query/retry.go
+++ b/lxd/db/query/retry.go
@@ -56,6 +56,10 @@ func IsRetriableError(err error) bool {
 		return false
 	}
 
+	if err, ok := err.(driver.Error); ok && err.Code == driver.ErrBusy {
+		return true
+	}
+
 	if err == sqlite3.ErrLocked || err == sqlite3.ErrBusy {
 		return true
 	}

From 4ba063876dcec1ea8659ab74bd66dbec58fb4a33 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Thu, 8 Oct 2020 16:23:23 +0200
Subject: [PATCH 3/5] db: Retry failed rollbacks if they are due to transient
 errors

This avoids leaving the connection in a bad state, where a transaction is
already in progress and another one can't be started.

Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
 lxd/db/query/transaction.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lxd/db/query/transaction.go b/lxd/db/query/transaction.go
index 0cde98614d..77bac41403 100644
--- a/lxd/db/query/transaction.go
+++ b/lxd/db/query/transaction.go
@@ -30,7 +30,7 @@ func Transaction(db *sql.DB, f func(*sql.Tx) error) error {
 // succeeds the given error is returned, otherwise a new error that wraps it
 // gets generated and returned.
 func rollback(tx *sql.Tx, reason error) error {
-	err := tx.Rollback()
+	err := Retry(tx.Rollback)
 	if err != nil {
 		logger.Warnf("Failed to rollback transaction after error (%v): %v", reason, err)
 	}

From f68785b88259f34af84c4360f06c33d69cbc4281 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Thu, 8 Oct 2020 16:49:56 +0200
Subject: [PATCH 4/5] db: Explicitly rollback leftover transactions when a new
 one can't be started

Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
 lxd/db/query/transaction.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/lxd/db/query/transaction.go b/lxd/db/query/transaction.go
index 77bac41403..901177b0e8 100644
--- a/lxd/db/query/transaction.go
+++ b/lxd/db/query/transaction.go
@@ -2,6 +2,7 @@ package query
 
 import (
 	"database/sql"
+	"strings"
 
 	"github.com/lxc/lxd/shared/logger"
 	"github.com/pkg/errors"
@@ -11,6 +12,11 @@ import (
 func Transaction(db *sql.DB, f func(*sql.Tx) error) error {
 	tx, err := db.Begin()
 	if err != nil {
+		// If there is a leftover transaction let's try to rollback,
+		// we'll then retry again.
+		if strings.Contains(err.Error(), "cannot start a transaction within a transaction") {
+			db.Exec("ROLLBACK")
+		}
 		return errors.Wrap(err, "failed to begin transaction")
 	}
 

From 54639c41eab9532e618be4b6d59a835c3ee6aee0 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanayaka at canonical.com>
Date: Thu, 8 Oct 2020 16:50:35 +0200
Subject: [PATCH 5/5] db: Retry to begin a new transaction after an explicit
 rollback attempt

Signed-off-by: Free Ekanayaka <free.ekanayaka at canonical.com>
---
 lxd/db/query/retry.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lxd/db/query/retry.go b/lxd/db/query/retry.go
index b8a3fb1116..0d8a28c032 100644
--- a/lxd/db/query/retry.go
+++ b/lxd/db/query/retry.go
@@ -68,6 +68,10 @@ func IsRetriableError(err error) bool {
 		return true
 	}
 
+	if strings.Contains(err.Error(), "cannot start a transaction within a transaction") {
+		return true
+	}
+
 	if strings.Contains(err.Error(), "bad connection") {
 		return true
 	}


More information about the lxc-devel mailing list