[lxc-devel] [lxd/master] LXD: Races
tomponline on Github
lxc-bot at linuxcontainers.org
Thu Aug 6 16:13:28 UTC 2020
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 390 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200806/29e3efc1/attachment.bin>
-------------- next part --------------
From 35640baf87ad7ef9c566b939a6e03798c2f00c27 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 6 Aug 2020 17:10:54 +0100
Subject: [PATCH 1/4] Makefile: Correctly builds lxd-p2c and lxd-agent in debug
and nocache targets
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
Makefile | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/Makefile b/Makefile
index 18cfb4eff9..c4f605a6b2 100644
--- a/Makefile
+++ b/Makefile
@@ -134,6 +134,8 @@ endif
go get -t -v -d ./...
CC=$(CC) go install -v -tags "$(TAG_SQLITE3) logdebug" $(DEBUG) ./...
+ CGO_ENABLED=0 go install -v -tags "netgo,logdebug" ./lxd-p2c
+ CGO_ENABLED=0 go install -v -tags "agent,netgo,logdebug" ./lxd-agent
@echo "LXD built successfully"
.PHONY: nocache
@@ -145,6 +147,8 @@ endif
go get -t -v -d ./...
CC=$(CC) go install -a -v -tags "$(TAG_SQLITE3)" $(DEBUG) ./...
+ CGO_ENABLED=0 go install -a -v -tags netgo ./lxd-p2c
+ CGO_ENABLED=0 go install -a -v -tags agent,netgo ./lxd-agent
@echo "LXD built successfully"
race:
From dccd1afa7c89c173d610afcc94375cb3a57788a0 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 6 Aug 2020 17:11:20 +0100
Subject: [PATCH 2/4] client/operations: Race fix
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
client/operations.go | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/client/operations.go b/client/operations.go
index b0bdf91be7..e99145098f 100644
--- a/client/operations.go
+++ b/client/operations.go
@@ -106,8 +106,9 @@ func (op *operation) Wait() error {
// Check if not done already
if op.StatusCode.IsFinal() {
if op.Err != "" {
+ err := op.Err
op.handlerLock.Unlock()
- return fmt.Errorf(op.Err)
+ return fmt.Errorf(err)
}
op.handlerLock.Unlock()
From 1deea91f2518789f6d000dd377f4a4588d4de257 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 6 Aug 2020 17:11:52 +0100
Subject: [PATCH 3/4] lxd/db: Adds mutex to fix races
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/db/db.go | 19 +++++++++++++------
1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/lxd/db/db.go b/lxd/db/db.go
index e1ef3a1f68..c0519e8e31 100644
--- a/lxd/db/db.go
+++ b/lxd/db/db.go
@@ -131,11 +131,12 @@ func (n *Node) Begin() (*sql.Tx, error) {
// Cluster mediates access to LXD's data stored in the cluster dqlite database.
type Cluster struct {
- db *sql.DB // Handle to the cluster dqlite database, gated behind gRPC SQL.
- nodeID int64 // Node ID of this LXD instance.
- mu sync.RWMutex
- stmts map[int]*sql.Stmt // Prepared statements by code.
- closing bool // True when daemon is shutting down, prevents retries
+ db *sql.DB // Handle to the cluster dqlite database, gated behind gRPC SQL.
+ nodeID int64 // Node ID of this LXD instance.
+ mu sync.RWMutex
+ stmts map[int]*sql.Stmt // Prepared statements by code.
+ closing bool // True when daemon is shutting down, prevents retries
+ clusterMu sync.Mutex
}
// OpenCluster creates a new Cluster object for interacting with the dqlite
@@ -327,7 +328,9 @@ func ForLocalInspectionWithPreparedStmts(db *sql.DB) (*Cluster, error) {
// Kill should be called upon shutdown, it will prevent retrying failed
// database queries.
func (c *Cluster) Kill() {
+ c.clusterMu.Lock()
c.closing = true
+ c.clusterMu.Unlock()
}
// GetNodeID returns the current nodeID (0 if not set)
@@ -391,7 +394,11 @@ func (c *Cluster) transaction(f func(*ClusterTx) error) error {
}
func (c *Cluster) retry(f func() error) error {
- if c.closing {
+ c.clusterMu.Lock()
+ closing := c.closing
+ c.clusterMu.Unlock()
+
+ if closing {
return f()
}
return query.Retry(f)
From c9b85e0bdf78c73676df49b532b56a5ad8d79c23 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 6 Aug 2020 17:12:13 +0100
Subject: [PATCH 4/4] lxd/operations: Fixes races
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/operations/operations.go | 41 ++++++++++++++++++++++++++++++++----
1 file changed, 37 insertions(+), 4 deletions(-)
diff --git a/lxd/operations/operations.go b/lxd/operations/operations.go
index 8b7208103f..7067bcebb1 100644
--- a/lxd/operations/operations.go
+++ b/lxd/operations/operations.go
@@ -186,7 +186,10 @@ func OperationCreate(s *state.State, project string, opClass operationClass, opT
logger.Debugf("New %s Operation: %s", op.class.String(), op.id)
_, md, _ := op.Render()
+
+ operationsLock.Lock()
op.sendEvent(md)
+ operationsLock.Unlock()
return &op, nil
}
@@ -255,9 +258,12 @@ func (op *Operation) Run() (chan error, error) {
chanRun <- err
logger.Debugf("Failure for %s operation: %s: %s", op.class.String(), op.id, err)
-
_, md, _ := op.Render()
+
+ op.lock.Lock()
op.sendEvent(md)
+ op.lock.Unlock()
+
return
}
@@ -267,18 +273,24 @@ func (op *Operation) Run() (chan error, error) {
op.done()
chanRun <- nil
- op.lock.Lock()
logger.Debugf("Success for %s operation: %s", op.class.String(), op.id)
_, md, _ := op.Render()
+
+ op.lock.Lock()
op.sendEvent(md)
op.lock.Unlock()
}(op, chanRun)
}
+
op.lock.Unlock()
logger.Debugf("Started %s operation: %s", op.class.String(), op.id)
_, md, _ := op.Render()
+
+ op.lock.Lock()
op.sendEvent(md)
+ op.lock.Unlock()
+
return chanRun, nil
}
@@ -313,7 +325,11 @@ func (op *Operation) Cancel() (chan error, error) {
logger.Debugf("Failed to cancel %s Operation: %s: %s", op.class.String(), op.id, err)
_, md, _ := op.Render()
+
+ op.lock.Lock()
op.sendEvent(md)
+ op.lock.Unlock()
+
return
}
@@ -325,7 +341,11 @@ func (op *Operation) Cancel() (chan error, error) {
logger.Debugf("Cancelled %s Operation: %s", op.class.String(), op.id)
_, md, _ := op.Render()
+
+ op.lock.Lock()
op.sendEvent(md)
+ op.lock.Unlock()
+
}(op, oldStatus, chanCancel)
}
@@ -350,7 +370,10 @@ func (op *Operation) Cancel() (chan error, error) {
logger.Debugf("Cancelled %s Operation: %s", op.class.String(), op.id)
_, md, _ = op.Render()
+
+ op.lock.Lock()
op.sendEvent(md)
+ op.lock.Unlock()
return chanCancel, nil
}
@@ -429,7 +452,8 @@ func (op *Operation) Render() (string, *api.Operation, error) {
return "", nil, err
}
- return op.url, &api.Operation{
+ op.lock.Lock()
+ retOp := &api.Operation{
ID: op.id,
Class: op.class.String(),
Description: op.description,
@@ -442,7 +466,10 @@ func (op *Operation) Render() (string, *api.Operation, error) {
MayCancel: op.mayCancel(),
Err: op.err,
Location: serverName,
- }, nil
+ }
+ op.lock.Unlock()
+
+ return op.url, retOp, nil
}
// WaitFinal waits for the operation to be done. If timeout is -1, it will wait
@@ -495,7 +522,10 @@ func (op *Operation) UpdateResources(opResources map[string][]string) error {
logger.Debugf("Updated resources for %s Operation: %s", op.class.String(), op.id)
_, md, _ := op.Render()
+
+ op.lock.Lock()
op.sendEvent(md)
+ op.lock.Unlock()
return nil
}
@@ -523,7 +553,10 @@ func (op *Operation) UpdateMetadata(opMetadata interface{}) error {
logger.Debugf("Updated metadata for %s Operation: %s", op.class.String(), op.id)
_, md, _ := op.Render()
+
+ op.lock.Lock()
op.sendEvent(md)
+ op.lock.Unlock()
return nil
}
More information about the lxc-devel
mailing list