engine: graph: Don't deadlock on error

This simplifies the pause mechanism and also avoids a deadlock on error.
If the Worker shuts down completely, but before we've been removed from
the graph, then an attempted pause would deadlock if we didn't have an
escape hatch here.

This removes the unnecessary ack mechanism now that we have a
synchronous channel send to represent the pausing, rather than an
asynchronous channel closing.
This commit is contained in:
James Shubin
2023-09-01 16:50:54 -04:00
parent 2773a621a2
commit 0b1b0a3f80
2 changed files with 3 additions and 15 deletions

View File

@@ -412,7 +412,7 @@ Loop:
// being made safe with a sync.Once, we instead run a // being made safe with a sync.Once, we instead run a
// close() call inside of the vertexRemoveFn function, // close() call inside of the vertexRemoveFn function,
// which should unblock resumeSignal so we can shutdown. // which should unblock resumeSignal so we can shutdown.
obj.state[vertex].pausedAck.Ack() // send ack
// we are paused now, and waiting for resume or exit... // we are paused now, and waiting for resume or exit...
select { select {
case _, closed = <-obj.state[vertex].resumeSignal: // channel closes case _, closed = <-obj.state[vertex].resumeSignal: // channel closes

View File

@@ -26,7 +26,6 @@ import (
"github.com/purpleidea/mgmt/converger" "github.com/purpleidea/mgmt/converger"
"github.com/purpleidea/mgmt/engine" "github.com/purpleidea/mgmt/engine"
"github.com/purpleidea/mgmt/pgraph" "github.com/purpleidea/mgmt/pgraph"
"github.com/purpleidea/mgmt/util"
"github.com/purpleidea/mgmt/util/errwrap" "github.com/purpleidea/mgmt/util/errwrap"
) )
@@ -108,11 +107,6 @@ type State struct {
// resumeSignal receives a message to resume this resource. The channel // resumeSignal receives a message to resume this resource. The channel
// closes when the resource is removed from the graph. // closes when the resource is removed from the graph.
resumeSignal chan struct{} resumeSignal chan struct{}
// pausedAck is used to send an ack message saying that we've paused.
// This helps us know if the pause was actually received and when it was
// received. Otherwise we might not know if it errored or when it
// actually stopped being busy and go to the paused stage.
pausedAck *util.EasyAck
wg *sync.WaitGroup // used for all vertex specific processes wg *sync.WaitGroup // used for all vertex specific processes
@@ -156,7 +150,6 @@ func (obj *State) Init() error {
//obj.paused = false // starts off as started //obj.paused = false // starts off as started
obj.pauseSignal = make(chan struct{}) obj.pauseSignal = make(chan struct{})
obj.resumeSignal = make(chan struct{}) obj.resumeSignal = make(chan struct{})
//obj.pausedAck = util.NewEasyAck() // happens on pause
obj.wg = &sync.WaitGroup{} obj.wg = &sync.WaitGroup{}
@@ -333,17 +326,12 @@ func (obj *State) Pause() error {
panic("already paused") panic("already paused")
} }
obj.pausedAck = util.NewEasyAck()
select {
case obj.pauseSignal <- struct{}{}:
}
// wait for ack (or exit signal) // wait for ack (or exit signal)
select { select {
case <-obj.pausedAck.Wait(): // we got it! case obj.pauseSignal <- struct{}{}:
// we're paused // we're paused
case <-obj.doneCtx.Done(): // gc cleans up the obj.pausedAck case <-obj.doneCtx.Done():
return engine.ErrClosed return engine.ErrClosed
} }
obj.paused = true obj.paused = true