[lxc-devel] [lxd/master] LXD: Races

tomponline on Github lxc-bot at linuxcontainers.org
Thu Aug 6 16:13:28 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 390 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200806/29e3efc1/attachment.bin>
-------------- next part --------------
From 35640baf87ad7ef9c566b939a6e03798c2f00c27 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 6 Aug 2020 17:10:54 +0100
Subject: [PATCH 1/4] Makefile: Correctly builds lxd-p2c and lxd-agent in debug
 and nocache targets

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 Makefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Makefile b/Makefile
index 18cfb4eff9..c4f605a6b2 100644
--- a/Makefile
+++ b/Makefile
@@ -134,6 +134,8 @@ endif
 
 	go get -t -v -d ./...
 	CC=$(CC) go install -v -tags "$(TAG_SQLITE3) logdebug" $(DEBUG) ./...
+	CGO_ENABLED=0 go install -v -tags "netgo,logdebug" ./lxd-p2c
+	CGO_ENABLED=0 go install -v -tags "agent,netgo,logdebug" ./lxd-agent
 	@echo "LXD built successfully"
 
 .PHONY: nocache
@@ -145,6 +147,8 @@ endif
 
 	go get -t -v -d ./...
 	CC=$(CC) go install -a -v -tags "$(TAG_SQLITE3)" $(DEBUG) ./...
+	CGO_ENABLED=0 go install -a -v -tags netgo ./lxd-p2c
+	CGO_ENABLED=0 go install -a -v -tags agent,netgo ./lxd-agent
 	@echo "LXD built successfully"
 
 race:

From dccd1afa7c89c173d610afcc94375cb3a57788a0 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 6 Aug 2020 17:11:20 +0100
Subject: [PATCH 2/4] client/operations: Race fix

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 client/operations.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/client/operations.go b/client/operations.go
index b0bdf91be7..e99145098f 100644
--- a/client/operations.go
+++ b/client/operations.go
@@ -106,8 +106,9 @@ func (op *operation) Wait() error {
 	// Check if not done already
 	if op.StatusCode.IsFinal() {
 		if op.Err != "" {
+			err := op.Err
 			op.handlerLock.Unlock()
-			return fmt.Errorf(op.Err)
+			return fmt.Errorf(err)
 		}
 
 		op.handlerLock.Unlock()

From 1deea91f2518789f6d000dd377f4a4588d4de257 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 6 Aug 2020 17:11:52 +0100
Subject: [PATCH 3/4] lxd/db: Adds mutex to fix races

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/db/db.go | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/lxd/db/db.go b/lxd/db/db.go
index e1ef3a1f68..c0519e8e31 100644
--- a/lxd/db/db.go
+++ b/lxd/db/db.go
@@ -131,11 +131,12 @@ func (n *Node) Begin() (*sql.Tx, error) {
 
 // Cluster mediates access to LXD's data stored in the cluster dqlite database.
 type Cluster struct {
-	db      *sql.DB // Handle to the cluster dqlite database, gated behind gRPC SQL.
-	nodeID  int64   // Node ID of this LXD instance.
-	mu      sync.RWMutex
-	stmts   map[int]*sql.Stmt // Prepared statements by code.
-	closing bool              // True when daemon is shutting down, prevents retries
+	db        *sql.DB // Handle to the cluster dqlite database, gated behind gRPC SQL.
+	nodeID    int64   // Node ID of this LXD instance.
+	mu        sync.RWMutex
+	stmts     map[int]*sql.Stmt // Prepared statements by code.
+	closing   bool              // True when daemon is shutting down, prevents retries
+	clusterMu sync.Mutex
 }
 
 // OpenCluster creates a new Cluster object for interacting with the dqlite
@@ -327,7 +328,9 @@ func ForLocalInspectionWithPreparedStmts(db *sql.DB) (*Cluster, error) {
 // Kill should be called upon shutdown, it will prevent retrying failed
 // database queries.
 func (c *Cluster) Kill() {
+	c.clusterMu.Lock()
 	c.closing = true
+	c.clusterMu.Unlock()
 }
 
 // GetNodeID returns the current nodeID (0 if not set)
@@ -391,7 +394,11 @@ func (c *Cluster) transaction(f func(*ClusterTx) error) error {
 }
 
 func (c *Cluster) retry(f func() error) error {
-	if c.closing {
+	c.clusterMu.Lock()
+	closing := c.closing
+	c.clusterMu.Unlock()
+
+	if closing {
 		return f()
 	}
 	return query.Retry(f)

From c9b85e0bdf78c73676df49b532b56a5ad8d79c23 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 6 Aug 2020 17:12:13 +0100
Subject: [PATCH 4/4] lxd/operations: Fixes races

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/operations/operations.go | 41 ++++++++++++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/lxd/operations/operations.go b/lxd/operations/operations.go
index 8b7208103f..7067bcebb1 100644
--- a/lxd/operations/operations.go
+++ b/lxd/operations/operations.go
@@ -186,7 +186,10 @@ func OperationCreate(s *state.State, project string, opClass operationClass, opT
 
 	logger.Debugf("New %s Operation: %s", op.class.String(), op.id)
 	_, md, _ := op.Render()
+
+	operationsLock.Lock()
 	op.sendEvent(md)
+	operationsLock.Unlock()
 
 	return &op, nil
 }
@@ -255,9 +258,12 @@ func (op *Operation) Run() (chan error, error) {
 				chanRun <- err
 
 				logger.Debugf("Failure for %s operation: %s: %s", op.class.String(), op.id, err)
-
 				_, md, _ := op.Render()
+
+				op.lock.Lock()
 				op.sendEvent(md)
+				op.lock.Unlock()
+
 				return
 			}
 
@@ -267,18 +273,24 @@ func (op *Operation) Run() (chan error, error) {
 			op.done()
 			chanRun <- nil
 
-			op.lock.Lock()
 			logger.Debugf("Success for %s operation: %s", op.class.String(), op.id)
 			_, md, _ := op.Render()
+
+			op.lock.Lock()
 			op.sendEvent(md)
 			op.lock.Unlock()
 		}(op, chanRun)
 	}
+
 	op.lock.Unlock()
 
 	logger.Debugf("Started %s operation: %s", op.class.String(), op.id)
 	_, md, _ := op.Render()
+
+	op.lock.Lock()
 	op.sendEvent(md)
+	op.lock.Unlock()
+
 	return chanRun, nil
 }
 
@@ -313,7 +325,11 @@ func (op *Operation) Cancel() (chan error, error) {
 
 				logger.Debugf("Failed to cancel %s Operation: %s: %s", op.class.String(), op.id, err)
 				_, md, _ := op.Render()
+
+				op.lock.Lock()
 				op.sendEvent(md)
+				op.lock.Unlock()
+
 				return
 			}
 
@@ -325,7 +341,11 @@ func (op *Operation) Cancel() (chan error, error) {
 
 			logger.Debugf("Cancelled %s Operation: %s", op.class.String(), op.id)
 			_, md, _ := op.Render()
+
+			op.lock.Lock()
 			op.sendEvent(md)
+			op.lock.Unlock()
+
 		}(op, oldStatus, chanCancel)
 	}
 
@@ -350,7 +370,10 @@ func (op *Operation) Cancel() (chan error, error) {
 
 	logger.Debugf("Cancelled %s Operation: %s", op.class.String(), op.id)
 	_, md, _ = op.Render()
+
+	op.lock.Lock()
 	op.sendEvent(md)
+	op.lock.Unlock()
 
 	return chanCancel, nil
 }
@@ -429,7 +452,8 @@ func (op *Operation) Render() (string, *api.Operation, error) {
 		return "", nil, err
 	}
 
-	return op.url, &api.Operation{
+	op.lock.Lock()
+	retOp := &api.Operation{
 		ID:          op.id,
 		Class:       op.class.String(),
 		Description: op.description,
@@ -442,7 +466,10 @@ func (op *Operation) Render() (string, *api.Operation, error) {
 		MayCancel:   op.mayCancel(),
 		Err:         op.err,
 		Location:    serverName,
-	}, nil
+	}
+	op.lock.Unlock()
+
+	return op.url, retOp, nil
 }
 
 // WaitFinal waits for the operation to be done. If timeout is -1, it will wait
@@ -495,7 +522,10 @@ func (op *Operation) UpdateResources(opResources map[string][]string) error {
 
 	logger.Debugf("Updated resources for %s Operation: %s", op.class.String(), op.id)
 	_, md, _ := op.Render()
+
+	op.lock.Lock()
 	op.sendEvent(md)
+	op.lock.Unlock()
 
 	return nil
 }
@@ -523,7 +553,10 @@ func (op *Operation) UpdateMetadata(opMetadata interface{}) error {
 
 	logger.Debugf("Updated metadata for %s Operation: %s", op.class.String(), op.id)
 	_, md, _ := op.Render()
+
+	op.lock.Lock()
 	op.sendEvent(md)
+	op.lock.Unlock()
 
 	return nil
 }


More information about the lxc-devel mailing list