converger: Rewrite the converger module
I found a deadlock in the converger code, and I realized the code was sufficiently bad that it needed a good clean up.
This commit is contained in:
@@ -29,135 +29,248 @@ import (
|
|||||||
multierr "github.com/hashicorp/go-multierror"
|
multierr "github.com/hashicorp/go-multierror"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TODO: we could make a new function that masks out the state of certain
|
// New builds a new converger coordinator.
|
||||||
// UID's, but at the moment the new Timer code has obsoleted the need...
|
func New(timeout int64) *Coordinator {
|
||||||
|
return &Coordinator{
|
||||||
// Converger is the general interface for implementing a convergence watcher.
|
|
||||||
type Converger interface { // TODO: need a better name
|
|
||||||
Register() UID
|
|
||||||
IsConverged(UID) bool // is the UID converged ?
|
|
||||||
SetConverged(UID, bool) error // set the converged state of the UID
|
|
||||||
Unregister(UID)
|
|
||||||
Start()
|
|
||||||
Pause()
|
|
||||||
Loop(bool)
|
|
||||||
ConvergedTimer(UID) <-chan time.Time
|
|
||||||
Status() map[uint64]bool
|
|
||||||
Timeout() int // returns the timeout that this was created with
|
|
||||||
AddStateFn(string, func(bool) error) error // adds a stateFn with a name
|
|
||||||
RemoveStateFn(string) error // remove a stateFn with a given name
|
|
||||||
}
|
|
||||||
|
|
||||||
// UID is the interface resources can use to notify with if converged. You'll
|
|
||||||
// need to use part of the Converger interface to Register initially too.
|
|
||||||
type UID interface {
|
|
||||||
ID() uint64 // get Id
|
|
||||||
Name() string // get a friendly name
|
|
||||||
SetName(string)
|
|
||||||
IsValid() bool // has Id been initialized ?
|
|
||||||
InvalidateID() // set Id to nil
|
|
||||||
IsConverged() bool
|
|
||||||
SetConverged(bool) error
|
|
||||||
Unregister()
|
|
||||||
ConvergedTimer() <-chan time.Time
|
|
||||||
StartTimer() (func() error, error) // cancellable is the same as StopTimer()
|
|
||||||
ResetTimer() error // resets counter to zero
|
|
||||||
StopTimer() error
|
|
||||||
}
|
|
||||||
|
|
||||||
// converger is an implementation of the Converger interface.
|
|
||||||
type converger struct {
|
|
||||||
timeout int // must be zero (instant) or greater seconds to run
|
|
||||||
converged bool // did we converge (state changes of this run Fn)
|
|
||||||
channel chan struct{} // signal here to run an isConverged check
|
|
||||||
control chan bool // control channel for start/pause
|
|
||||||
mutex *sync.RWMutex // used for controlling access to status and lastid
|
|
||||||
lastid uint64
|
|
||||||
status map[uint64]bool
|
|
||||||
stateFns map[string]func(bool) error // run on converged state changes with state bool
|
|
||||||
smutex *sync.RWMutex // used for controlling access to stateFns
|
|
||||||
}
|
|
||||||
|
|
||||||
// cuid is an implementation of the UID interface.
|
|
||||||
type cuid struct {
|
|
||||||
converger Converger
|
|
||||||
id uint64
|
|
||||||
name string // user defined, friendly name
|
|
||||||
mutex *sync.Mutex
|
|
||||||
timer chan struct{}
|
|
||||||
running bool // is the above timer running?
|
|
||||||
wg *sync.WaitGroup
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewConverger builds a new converger struct.
|
|
||||||
func NewConverger(timeout int) Converger {
|
|
||||||
return &converger{
|
|
||||||
timeout: timeout,
|
timeout: timeout,
|
||||||
channel: make(chan struct{}),
|
|
||||||
control: make(chan bool),
|
|
||||||
mutex: &sync.RWMutex{},
|
mutex: &sync.RWMutex{},
|
||||||
lastid: 0,
|
|
||||||
status: make(map[uint64]bool),
|
//lastid: 0,
|
||||||
|
status: make(map[*UID]struct{}),
|
||||||
|
|
||||||
|
//converged: false, // initial state
|
||||||
|
|
||||||
|
pokeChan: make(chan struct{}, 1), // must be buffered
|
||||||
|
|
||||||
|
readyChan: make(chan struct{}), // ready signal
|
||||||
|
|
||||||
|
//paused: false, // starts off as started
|
||||||
|
pauseSignal: make(chan struct{}),
|
||||||
|
//resumeSignal: make(chan struct{}), // happens on pause
|
||||||
|
//pausedAck: util.NewEasyAck(), // happens on pause
|
||||||
|
|
||||||
stateFns: make(map[string]func(bool) error),
|
stateFns: make(map[string]func(bool) error),
|
||||||
smutex: &sync.RWMutex{},
|
smutex: &sync.RWMutex{},
|
||||||
|
|
||||||
|
closeChan: make(chan struct{}),
|
||||||
|
wg: &sync.WaitGroup{},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Register assigns a UID to the caller.
|
// Coordinator is the central converger engine.
|
||||||
func (obj *converger) Register() UID {
|
type Coordinator struct {
|
||||||
|
// timeout must be zero (instant) or greater seconds to run. If it's -1
|
||||||
|
// then this is disabled, and we never run stateFns.
|
||||||
|
timeout int64
|
||||||
|
|
||||||
|
// mutex is used for controlling access to status and lastid.
|
||||||
|
mutex *sync.RWMutex
|
||||||
|
|
||||||
|
// lastid contains the last uid we used for registration.
|
||||||
|
//lastid uint64
|
||||||
|
// status contains a reference to each active UID.
|
||||||
|
status map[*UID]struct{}
|
||||||
|
|
||||||
|
// converged stores the last convergence state. When this changes, we
|
||||||
|
// run the stateFns.
|
||||||
|
converged bool
|
||||||
|
|
||||||
|
// pokeChan receives a message every time we might need to re-calculate.
|
||||||
|
pokeChan chan struct{}
|
||||||
|
|
||||||
|
// readyChan closes to notify any interested parties that the main loop
|
||||||
|
// is running.
|
||||||
|
readyChan chan struct{}
|
||||||
|
|
||||||
|
// paused represents if this coordinator is paused or not.
|
||||||
|
paused bool
|
||||||
|
// pauseSignal closes to request a pause of this coordinator.
|
||||||
|
pauseSignal chan struct{}
|
||||||
|
// resumeSignal closes to request a resume of this coordinator.
|
||||||
|
resumeSignal chan struct{}
|
||||||
|
// pausedAck is used to send an ack message saying that we've paused.
|
||||||
|
pausedAck *util.EasyAck
|
||||||
|
|
||||||
|
// stateFns run on converged state changes.
|
||||||
|
stateFns map[string]func(bool) error
|
||||||
|
// smutex is used for controlling access to the stateFns map.
|
||||||
|
smutex *sync.RWMutex
|
||||||
|
|
||||||
|
// closeChan closes when we've been requested to shutdown.
|
||||||
|
closeChan chan struct{}
|
||||||
|
// wg waits for everything to finish.
|
||||||
|
wg *sync.WaitGroup
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register creates a new UID which can be used to report converged state. You
|
||||||
|
// must Unregister each UID before Shutdown will be able to finish running.
|
||||||
|
func (obj *Coordinator) Register() *UID {
|
||||||
|
obj.wg.Add(1) // additional tracking for each UID
|
||||||
obj.mutex.Lock()
|
obj.mutex.Lock()
|
||||||
defer obj.mutex.Unlock()
|
defer obj.mutex.Unlock()
|
||||||
obj.lastid++
|
//obj.lastid++
|
||||||
obj.status[obj.lastid] = false // initialize as not converged
|
uid := &UID{
|
||||||
return &cuid{
|
timeout: obj.timeout, // copy the timeout here
|
||||||
converger: obj,
|
//id: obj.lastid,
|
||||||
id: obj.lastid,
|
//name: fmt.Sprintf("%d", obj.lastid), // some default
|
||||||
name: fmt.Sprintf("%d", obj.lastid), // some default
|
|
||||||
|
poke: obj.poke,
|
||||||
|
|
||||||
|
// timer
|
||||||
mutex: &sync.Mutex{},
|
mutex: &sync.Mutex{},
|
||||||
timer: nil,
|
timer: nil,
|
||||||
running: false,
|
running: false,
|
||||||
wg: &sync.WaitGroup{},
|
wg: &sync.WaitGroup{},
|
||||||
}
|
}
|
||||||
|
uid.unregister = func() { obj.Unregister(uid) } // add unregister func
|
||||||
|
obj.status[uid] = struct{}{} // TODO: add converged state here?
|
||||||
|
return uid
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsConverged gets the converged status of a uid.
|
// Unregister removes the UID from the converger coordinator. If you supply an
|
||||||
func (obj *converger) IsConverged(uid UID) bool {
|
// invalid or unregistered uid to this function, it will panic. An unregistered
|
||||||
if !uid.IsValid() {
|
// UID is no longer part of the convergence checking.
|
||||||
panic(fmt.Sprintf("the ID of UID(%s) is nil", uid.Name()))
|
func (obj *Coordinator) Unregister(uid *UID) {
|
||||||
}
|
defer obj.wg.Done() // additional tracking for each UID
|
||||||
obj.mutex.RLock()
|
|
||||||
isConverged, found := obj.status[uid.ID()] // lookup
|
|
||||||
obj.mutex.RUnlock()
|
|
||||||
if !found {
|
|
||||||
panic("the ID of UID is unregistered")
|
|
||||||
}
|
|
||||||
return isConverged
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetConverged updates the converger with the converged state of the UID.
|
|
||||||
func (obj *converger) SetConverged(uid UID, isConverged bool) error {
|
|
||||||
if !uid.IsValid() {
|
|
||||||
return fmt.Errorf("the ID of UID(%s) is nil", uid.Name())
|
|
||||||
}
|
|
||||||
obj.mutex.Lock()
|
obj.mutex.Lock()
|
||||||
if _, found := obj.status[uid.ID()]; !found {
|
defer obj.mutex.Unlock()
|
||||||
panic("the ID of UID is unregistered")
|
|
||||||
|
if _, exists := obj.status[uid]; !exists {
|
||||||
|
panic("uid is not registered")
|
||||||
}
|
}
|
||||||
obj.status[uid.ID()] = isConverged // set
|
uid.StopTimer() // ignore any errors
|
||||||
obj.mutex.Unlock() // unlock *before* poke or deadlock!
|
delete(obj.status, uid)
|
||||||
if isConverged != obj.converged { // only poke if it would be helpful
|
}
|
||||||
// run in a go routine so that we never block... just queue up!
|
|
||||||
// this allows us to send events, even if we haven't started...
|
// Run starts the main loop for the converger coordinator. It is commonly run
|
||||||
go func() { obj.channel <- struct{}{} }()
|
// from a go routine. It blocks until the Shutdown method is run to close it.
|
||||||
|
// NOTE: when we have very short timeouts, if we start before all the resources
|
||||||
|
// have joined the map, then it might appear as if we converged before we did!
|
||||||
|
func (obj *Coordinator) Run(startPaused bool) {
|
||||||
|
obj.wg.Add(1)
|
||||||
|
wg := &sync.WaitGroup{} // needed for the startPaused
|
||||||
|
defer wg.Wait() // don't leave any leftover go routines running
|
||||||
|
if startPaused {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
obj.Pause() // ignore any errors
|
||||||
|
close(obj.readyChan)
|
||||||
|
}()
|
||||||
|
} else {
|
||||||
|
close(obj.readyChan) // we must wait till the wg.Add(1) has happened...
|
||||||
}
|
}
|
||||||
|
defer obj.wg.Done()
|
||||||
|
for {
|
||||||
|
// pause if one was requested...
|
||||||
|
select {
|
||||||
|
case <-obj.pauseSignal: // channel closes
|
||||||
|
obj.pausedAck.Ack() // send ack
|
||||||
|
// we are paused now, and waiting for resume or exit...
|
||||||
|
select {
|
||||||
|
case <-obj.resumeSignal: // channel closes
|
||||||
|
// resumed!
|
||||||
|
|
||||||
|
case <-obj.closeChan: // we can always escape
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
case _, ok := <-obj.pokeChan: // we got an event (re-calculate)
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := obj.test(); err != nil {
|
||||||
|
// FIXME: what to do on error ?
|
||||||
|
}
|
||||||
|
|
||||||
|
case <-obj.closeChan: // we can always escape
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ready blocks until the Run loop has started up. This is useful so that we
|
||||||
|
// don't run Shutdown before we've even started up properly.
|
||||||
|
func (obj *Coordinator) Ready() {
|
||||||
|
select {
|
||||||
|
case <-obj.readyChan:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shutdown sends a signal to the Run loop that it should exit. This blocks
|
||||||
|
// until it does.
|
||||||
|
func (obj *Coordinator) Shutdown() {
|
||||||
|
close(obj.closeChan)
|
||||||
|
obj.wg.Wait()
|
||||||
|
close(obj.pokeChan) // free memory?
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pause pauses the coordinator. It should not be called on an already paused
|
||||||
|
// coordinator. It will block until the coordinator pauses with an
|
||||||
|
// acknowledgment, or until an exit is requested. If the latter happens it will
|
||||||
|
// error. It is NOT thread-safe with the Resume() method so only call either one
|
||||||
|
// at a time.
|
||||||
|
func (obj *Coordinator) Pause() error {
|
||||||
|
if obj.paused {
|
||||||
|
return fmt.Errorf("already paused")
|
||||||
|
}
|
||||||
|
|
||||||
|
obj.pausedAck = util.NewEasyAck()
|
||||||
|
obj.resumeSignal = make(chan struct{}) // build the resume signal
|
||||||
|
close(obj.pauseSignal)
|
||||||
|
|
||||||
|
// wait for ack (or exit signal)
|
||||||
|
select {
|
||||||
|
case <-obj.pausedAck.Wait(): // we got it!
|
||||||
|
// we're paused
|
||||||
|
case <-obj.closeChan:
|
||||||
|
return fmt.Errorf("closing")
|
||||||
|
}
|
||||||
|
obj.paused = true
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// isConverged returns true if *every* registered uid has converged.
|
// Resume unpauses the coordinator. It can be safely called on a brand-new
|
||||||
func (obj *converger) isConverged() bool {
|
// coordinator that has just started running without incident. It is NOT
|
||||||
obj.mutex.RLock() // take a read lock
|
// thread-safe with the Pause() method, so only call either one at a time.
|
||||||
defer obj.mutex.RUnlock()
|
func (obj *Coordinator) Resume() {
|
||||||
for _, v := range obj.status {
|
// TODO: do we need a mutex around Resume?
|
||||||
|
if !obj.paused { // no need to unpause brand-new resources
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
obj.pauseSignal = make(chan struct{}) // rebuild for next pause
|
||||||
|
close(obj.resumeSignal)
|
||||||
|
obj.poke() // unblock and notice the resume if necessary
|
||||||
|
|
||||||
|
obj.paused = false
|
||||||
|
|
||||||
|
// no need to wait for it to resume
|
||||||
|
//return // implied
|
||||||
|
}
|
||||||
|
|
||||||
|
// poke sends a message to the coordinator telling it that it should re-evaluate
|
||||||
|
// whether we're converged or not. This does not block. Do not run this in a
|
||||||
|
// goroutine. It must not be called after Shutdown has been called.
|
||||||
|
func (obj *Coordinator) poke() {
|
||||||
|
// redundant
|
||||||
|
//if len(obj.pokeChan) > 0 {
|
||||||
|
// return
|
||||||
|
//}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case obj.pokeChan <- struct{}{}:
|
||||||
|
default: // if chan is now full because more than one poke happened...
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsConverged returns true if *every* registered uid has converged. If there
|
||||||
|
// are no registered UID's, then this will return true.
|
||||||
|
func (obj *Coordinator) IsConverged() bool {
|
||||||
|
for _, v := range obj.Status() {
|
||||||
if !v { // everyone must be converged for this to be true
|
if !v { // everyone must be converged for this to be true
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@@ -165,145 +278,40 @@ func (obj *converger) isConverged() bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unregister dissociates the ConvergedUID from the converged checking.
|
// test evaluates whether we're converged or not and runs the state change. It
|
||||||
func (obj *converger) Unregister(uid UID) {
|
// is NOT thread-safe.
|
||||||
if !uid.IsValid() {
|
func (obj *Coordinator) test() error {
|
||||||
panic(fmt.Sprintf("the ID of UID(%s) is nil", uid.Name()))
|
// TODO: add these checks elsewhere to prevent anything from running?
|
||||||
}
|
if obj.timeout < 0 {
|
||||||
obj.mutex.Lock()
|
return nil // nothing to do (only run if timeout is valid)
|
||||||
uid.StopTimer() // ignore any errors
|
|
||||||
delete(obj.status, uid.ID())
|
|
||||||
obj.mutex.Unlock()
|
|
||||||
uid.InvalidateID()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start causes a Converger object to start or resume running.
|
|
||||||
func (obj *converger) Start() {
|
|
||||||
obj.control <- true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pause causes a Converger object to stop running temporarily.
|
|
||||||
func (obj *converger) Pause() { // FIXME: add a sync ACK on pause before return
|
|
||||||
obj.control <- false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Loop is the main loop for a Converger object. It usually runs in a goroutine.
|
|
||||||
// TODO: we could eventually have each resource tell us as soon as it converges,
|
|
||||||
// and then keep track of the time delays here, to avoid callers needing select.
|
|
||||||
// NOTE: when we have very short timeouts, if we start before all the resources
|
|
||||||
// have joined the map, then it might appear as if we converged before we did!
|
|
||||||
func (obj *converger) Loop(startPaused bool) {
|
|
||||||
if obj.control == nil {
|
|
||||||
panic("converger not initialized correctly")
|
|
||||||
}
|
|
||||||
if startPaused { // start paused without racing
|
|
||||||
select {
|
|
||||||
case e := <-obj.control:
|
|
||||||
if !e {
|
|
||||||
panic("converger expected true")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case e := <-obj.control: // expecting "false" which means pause!
|
|
||||||
if e {
|
|
||||||
panic("converger expected false")
|
|
||||||
}
|
|
||||||
// now i'm paused...
|
|
||||||
select {
|
|
||||||
case e := <-obj.control:
|
|
||||||
if !e {
|
|
||||||
panic("converger expected true")
|
|
||||||
}
|
|
||||||
// restart
|
|
||||||
// kick once to refresh the check...
|
|
||||||
go func() { obj.channel <- struct{}{} }()
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
case <-obj.channel:
|
converged := obj.IsConverged()
|
||||||
if !obj.isConverged() {
|
defer func() {
|
||||||
if obj.converged { // we're doing a state change
|
obj.converged = converged // set this only at the end...
|
||||||
|
}()
|
||||||
|
|
||||||
|
if !converged {
|
||||||
|
if !obj.converged { // were we previously also not converged?
|
||||||
|
return nil // nothing to do
|
||||||
|
}
|
||||||
|
|
||||||
|
// we're doing a state change
|
||||||
// call the arbitrary functions (takes a read lock!)
|
// call the arbitrary functions (takes a read lock!)
|
||||||
if err := obj.runStateFns(false); err != nil {
|
return obj.runStateFns(false)
|
||||||
// FIXME: what to do on error ?
|
|
||||||
}
|
|
||||||
}
|
|
||||||
obj.converged = false
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// we have converged!
|
// we have converged!
|
||||||
if obj.timeout >= 0 { // only run if timeout is valid
|
if obj.converged { // were we previously also converged?
|
||||||
if !obj.converged { // we're doing a state change
|
return nil // nothing to do
|
||||||
|
}
|
||||||
|
|
||||||
// call the arbitrary functions (takes a read lock!)
|
// call the arbitrary functions (takes a read lock!)
|
||||||
if err := obj.runStateFns(true); err != nil {
|
return obj.runStateFns(true)
|
||||||
// FIXME: what to do on error ?
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
obj.converged = true
|
|
||||||
// loop and wait again...
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ConvergedTimer adds a timeout to a select call and blocks until then.
|
// runStateFns runs the list of stored state functions.
|
||||||
// TODO: this means we could eventually have per resource converged timeouts
|
func (obj *Coordinator) runStateFns(converged bool) error {
|
||||||
func (obj *converger) ConvergedTimer(uid UID) <-chan time.Time {
|
|
||||||
// be clever: if i'm already converged, this timeout should block which
|
|
||||||
// avoids unnecessary new signals being sent! this avoids fast loops if
|
|
||||||
// we have a low timeout, or in particular a timeout == 0
|
|
||||||
if uid.IsConverged() {
|
|
||||||
// blocks the case statement in select forever!
|
|
||||||
return util.TimeAfterOrBlock(-1)
|
|
||||||
}
|
|
||||||
return util.TimeAfterOrBlock(obj.timeout)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Status returns a map of the converged status of each UID.
|
|
||||||
func (obj *converger) Status() map[uint64]bool {
|
|
||||||
status := make(map[uint64]bool)
|
|
||||||
obj.mutex.RLock() // take a read lock
|
|
||||||
defer obj.mutex.RUnlock()
|
|
||||||
for k, v := range obj.status { // make a copy to avoid the mutex
|
|
||||||
status[k] = v
|
|
||||||
}
|
|
||||||
return status
|
|
||||||
}
|
|
||||||
|
|
||||||
// Timeout returns the timeout in seconds that converger was created with. This
|
|
||||||
// is useful to avoid passing in the timeout value separately when you're
|
|
||||||
// already passing in the Converger struct.
|
|
||||||
func (obj *converger) Timeout() int {
|
|
||||||
return obj.timeout
|
|
||||||
}
|
|
||||||
|
|
||||||
// AddStateFn adds a state function to be run on change of converged state.
|
|
||||||
func (obj *converger) AddStateFn(name string, stateFn func(bool) error) error {
|
|
||||||
obj.smutex.Lock()
|
|
||||||
defer obj.smutex.Unlock()
|
|
||||||
if _, exists := obj.stateFns[name]; exists {
|
|
||||||
return fmt.Errorf("a stateFn with that name already exists")
|
|
||||||
}
|
|
||||||
obj.stateFns[name] = stateFn
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// RemoveStateFn adds a state function to be run on change of converged state.
|
|
||||||
func (obj *converger) RemoveStateFn(name string) error {
|
|
||||||
obj.smutex.Lock()
|
|
||||||
defer obj.smutex.Unlock()
|
|
||||||
if _, exists := obj.stateFns[name]; !exists {
|
|
||||||
return fmt.Errorf("a stateFn with that name doesn't exist")
|
|
||||||
}
|
|
||||||
delete(obj.stateFns, name)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// runStateFns runs the listed of stored state functions.
|
|
||||||
func (obj *converger) runStateFns(converged bool) error {
|
|
||||||
obj.smutex.RLock()
|
obj.smutex.RLock()
|
||||||
defer obj.smutex.RUnlock()
|
defer obj.smutex.RUnlock()
|
||||||
var keys []string
|
var keys []string
|
||||||
@@ -322,70 +330,119 @@ func (obj *converger) runStateFns(converged bool) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// ID returns the unique id of this UID object.
|
// AddStateFn adds a state function to be run on change of converged state.
|
||||||
func (obj *cuid) ID() uint64 {
|
func (obj *Coordinator) AddStateFn(name string, stateFn func(bool) error) error {
|
||||||
return obj.id
|
obj.smutex.Lock()
|
||||||
|
defer obj.smutex.Unlock()
|
||||||
|
if _, exists := obj.stateFns[name]; exists {
|
||||||
|
return fmt.Errorf("a stateFn with that name already exists")
|
||||||
|
}
|
||||||
|
obj.stateFns[name] = stateFn
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Name returns a user defined name for the specific cuid.
|
// RemoveStateFn removes a state function from running on change of converged
|
||||||
func (obj *cuid) Name() string {
|
// state.
|
||||||
return obj.name
|
func (obj *Coordinator) RemoveStateFn(name string) error {
|
||||||
|
obj.smutex.Lock()
|
||||||
|
defer obj.smutex.Unlock()
|
||||||
|
if _, exists := obj.stateFns[name]; !exists {
|
||||||
|
return fmt.Errorf("a stateFn with that name doesn't exist")
|
||||||
|
}
|
||||||
|
delete(obj.stateFns, name)
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetName sets a user defined name for the specific cuid.
|
// Status returns a map of the converged status of each UID.
|
||||||
func (obj *cuid) SetName(name string) {
|
func (obj *Coordinator) Status() map[*UID]bool {
|
||||||
obj.name = name
|
status := make(map[*UID]bool)
|
||||||
|
obj.mutex.RLock() // take a read lock
|
||||||
|
defer obj.mutex.RUnlock()
|
||||||
|
for k := range obj.status {
|
||||||
|
status[k] = k.IsConverged()
|
||||||
|
}
|
||||||
|
return status
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsValid tells us if the id is valid or has already been destroyed.
|
// Timeout returns the timeout in seconds that converger was created with. This
|
||||||
func (obj *cuid) IsValid() bool {
|
// is useful to avoid passing in the timeout value separately when you're
|
||||||
return obj.id != 0 // an id of 0 is invalid
|
// already passing in the Coordinator struct.
|
||||||
|
func (obj *Coordinator) Timeout() int64 {
|
||||||
|
return obj.timeout
|
||||||
}
|
}
|
||||||
|
|
||||||
// InvalidateID marks the id as no longer valid.
|
// UID represents one of the probes for the converger coordinator. It is created
|
||||||
func (obj *cuid) InvalidateID() {
|
// by calling the Register method of the Coordinator struct. It should be freed
|
||||||
obj.id = 0 // an id of 0 is invalid
|
// after use with Unregister.
|
||||||
|
type UID struct {
|
||||||
|
// timeout is a copy of the main timeout. It could eventually be used
|
||||||
|
// for per-UID timeouts too.
|
||||||
|
timeout int64
|
||||||
|
// isConverged stores the convergence state of this particular UID.
|
||||||
|
isConverged bool
|
||||||
|
|
||||||
|
// poke stores a reference to the main poke function.
|
||||||
|
poke func()
|
||||||
|
// unregister stores a reference to the unregister function.
|
||||||
|
unregister func()
|
||||||
|
|
||||||
|
// timer
|
||||||
|
mutex *sync.Mutex
|
||||||
|
timer chan struct{}
|
||||||
|
running bool // is the timer running?
|
||||||
|
wg *sync.WaitGroup
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsConverged is a helper function to the regular IsConverged method.
|
// Unregister removes this UID from the converger coordinator. An unregistered
|
||||||
func (obj *cuid) IsConverged() bool {
|
// UID is no longer part of the convergence checking.
|
||||||
return obj.converger.IsConverged(obj)
|
func (obj *UID) Unregister() {
|
||||||
|
obj.unregister()
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetConverged is a helper function to the regular SetConverged notification.
|
// IsConverged reports whether this UID is converged or not.
|
||||||
func (obj *cuid) SetConverged(isConverged bool) error {
|
func (obj *UID) IsConverged() bool {
|
||||||
return obj.converger.SetConverged(obj, isConverged)
|
return obj.isConverged
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unregister is a helper function to unregister myself.
|
// SetConverged sets the convergence state of this UID. This is used by the
|
||||||
func (obj *cuid) Unregister() {
|
// running timer if one is started. The timer will overwrite any value set by
|
||||||
obj.converger.Unregister(obj)
|
// this method.
|
||||||
|
func (obj *UID) SetConverged(isConverged bool) {
|
||||||
|
obj.isConverged = isConverged
|
||||||
|
obj.poke() // notify of change
|
||||||
}
|
}
|
||||||
|
|
||||||
// ConvergedTimer is a helper around the regular ConvergedTimer method.
|
// ConvergedTimer adds a timeout to a select call and blocks until then.
|
||||||
func (obj *cuid) ConvergedTimer() <-chan time.Time {
|
// TODO: this means we could eventually have per resource converged timeouts
|
||||||
return obj.converger.ConvergedTimer(obj)
|
func (obj *UID) ConvergedTimer() <-chan time.Time {
|
||||||
|
// be clever: if i'm already converged, this timeout should block which
|
||||||
|
// avoids unnecessary new signals being sent! this avoids fast loops if
|
||||||
|
// we have a low timeout, or in particular a timeout == 0
|
||||||
|
if obj.IsConverged() {
|
||||||
|
// blocks the case statement in select forever!
|
||||||
|
return util.TimeAfterOrBlock(-1)
|
||||||
|
}
|
||||||
|
return util.TimeAfterOrBlock(int(obj.timeout))
|
||||||
}
|
}
|
||||||
|
|
||||||
// StartTimer runs an invisible timer that automatically converges on timeout.
|
// StartTimer runs a timer that sets us as converged on timeout. It also returns
|
||||||
func (obj *cuid) StartTimer() (func() error, error) {
|
// a handle to the StopTimer function which should be run before exit.
|
||||||
|
func (obj *UID) StartTimer() (func() error, error) {
|
||||||
obj.mutex.Lock()
|
obj.mutex.Lock()
|
||||||
if !obj.running {
|
defer obj.mutex.Unlock()
|
||||||
obj.timer = make(chan struct{})
|
if obj.running {
|
||||||
obj.running = true
|
|
||||||
} else {
|
|
||||||
obj.mutex.Unlock()
|
|
||||||
return obj.StopTimer, fmt.Errorf("timer already started")
|
return obj.StopTimer, fmt.Errorf("timer already started")
|
||||||
}
|
}
|
||||||
obj.mutex.Unlock()
|
obj.timer = make(chan struct{})
|
||||||
|
obj.running = true
|
||||||
obj.wg.Add(1)
|
obj.wg.Add(1)
|
||||||
go func() {
|
go func() {
|
||||||
defer obj.wg.Done()
|
defer obj.wg.Done()
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case _, ok := <-obj.timer: // reset signal channel
|
case _, ok := <-obj.timer: // reset signal channel
|
||||||
if !ok { // channel is closed
|
if !ok {
|
||||||
return // false to exit
|
return
|
||||||
}
|
}
|
||||||
obj.SetConverged(false)
|
obj.SetConverged(false)
|
||||||
|
|
||||||
@@ -393,8 +450,8 @@ func (obj *cuid) StartTimer() (func() error, error) {
|
|||||||
obj.SetConverged(true) // converged!
|
obj.SetConverged(true) // converged!
|
||||||
select {
|
select {
|
||||||
case _, ok := <-obj.timer: // reset signal channel
|
case _, ok := <-obj.timer: // reset signal channel
|
||||||
if !ok { // channel is closed
|
if !ok {
|
||||||
return // false to exit
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -403,8 +460,8 @@ func (obj *cuid) StartTimer() (func() error, error) {
|
|||||||
return obj.StopTimer, nil
|
return obj.StopTimer, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ResetTimer resets the counter to zero if using a StartTimer internally.
|
// ResetTimer resets the timer to zero.
|
||||||
func (obj *cuid) ResetTimer() error {
|
func (obj *UID) ResetTimer() error {
|
||||||
obj.mutex.Lock()
|
obj.mutex.Lock()
|
||||||
defer obj.mutex.Unlock()
|
defer obj.mutex.Unlock()
|
||||||
if obj.running {
|
if obj.running {
|
||||||
@@ -414,8 +471,8 @@ func (obj *cuid) ResetTimer() error {
|
|||||||
return fmt.Errorf("timer hasn't been started")
|
return fmt.Errorf("timer hasn't been started")
|
||||||
}
|
}
|
||||||
|
|
||||||
// StopTimer stops the running timer permanently until a StartTimer is run.
|
// StopTimer stops the running timer.
|
||||||
func (obj *cuid) StopTimer() error {
|
func (obj *UID) StopTimer() error {
|
||||||
obj.mutex.Lock()
|
obj.mutex.Lock()
|
||||||
defer obj.mutex.Unlock()
|
defer obj.mutex.Unlock()
|
||||||
if !obj.running {
|
if !obj.running {
|
||||||
|
|||||||
31
converger/converger_test.go
Normal file
31
converger/converger_test.go
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
// Mgmt
|
||||||
|
// Copyright (C) 2013-2018+ James Shubin and the project contributors
|
||||||
|
// Written by James Shubin <james@shubin.ca> and the project contributors
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
// +build !root
|
||||||
|
|
||||||
|
package converger
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBufferedChan1(t *testing.T) {
|
||||||
|
ch := make(chan bool, 1)
|
||||||
|
ch <- true
|
||||||
|
close(ch) // closing a channel that's not empty should not block
|
||||||
|
// must be able to exit without blocking anywhere
|
||||||
|
}
|
||||||
@@ -42,7 +42,7 @@ type Engine struct {
|
|||||||
// Prefix is a unique directory prefix which can be used. It should be
|
// Prefix is a unique directory prefix which can be used. It should be
|
||||||
// created if needed.
|
// created if needed.
|
||||||
Prefix string
|
Prefix string
|
||||||
Converger converger.Converger
|
Converger *converger.Coordinator
|
||||||
|
|
||||||
Debug bool
|
Debug bool
|
||||||
Logf func(format string, v ...interface{})
|
Logf func(format string, v ...interface{})
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ type State struct {
|
|||||||
// created if needed.
|
// created if needed.
|
||||||
Prefix string
|
Prefix string
|
||||||
|
|
||||||
//Converger converger.Converger
|
//Converger *converger.Coordinator
|
||||||
|
|
||||||
// Debug turns on additional output and behaviours.
|
// Debug turns on additional output and behaviours.
|
||||||
Debug bool
|
Debug bool
|
||||||
@@ -85,8 +85,8 @@ type State struct {
|
|||||||
starter bool // do we have an indegree of 0 ?
|
starter bool // do we have an indegree of 0 ?
|
||||||
working bool // is the Main() loop running ?
|
working bool // is the Main() loop running ?
|
||||||
|
|
||||||
cuid converger.UID // primary converger
|
cuid *converger.UID // primary converger
|
||||||
tuid converger.UID // secondary converger
|
tuid *converger.UID // secondary converger
|
||||||
|
|
||||||
init *engine.Init // a copy of the init struct passed to res Init
|
init *engine.Init // a copy of the init struct passed to res Init
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -211,7 +211,7 @@ type EmbdEtcd struct { // EMBeddeD etcd
|
|||||||
|
|
||||||
flags Flags
|
flags Flags
|
||||||
prefix string // folder prefix to use for misc storage
|
prefix string // folder prefix to use for misc storage
|
||||||
converger converger.Converger // converged tracking
|
converger *converger.Coordinator // converged tracking
|
||||||
|
|
||||||
// etcd server related
|
// etcd server related
|
||||||
serverwg sync.WaitGroup // wait for server to shutdown
|
serverwg sync.WaitGroup // wait for server to shutdown
|
||||||
@@ -221,7 +221,7 @@ type EmbdEtcd struct { // EMBeddeD etcd
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NewEmbdEtcd creates the top level embedded etcd struct client and server obj.
|
// NewEmbdEtcd creates the top level embedded etcd struct client and server obj.
|
||||||
func NewEmbdEtcd(hostname string, seeds, clientURLs, serverURLs, advertiseClientURLs, advertiseServerURLs etcdtypes.URLs, noServer bool, noNetwork bool, idealClusterSize uint16, flags Flags, prefix string, converger converger.Converger) *EmbdEtcd {
|
func NewEmbdEtcd(hostname string, seeds, clientURLs, serverURLs, advertiseClientURLs, advertiseServerURLs etcdtypes.URLs, noServer bool, noNetwork bool, idealClusterSize uint16, flags Flags, prefix string, converger *converger.Coordinator) *EmbdEtcd {
|
||||||
endpoints := make(etcdtypes.URLsMap)
|
endpoints := make(etcdtypes.URLsMap)
|
||||||
if hostname == seedSentinel { // safety
|
if hostname == seedSentinel { // safety
|
||||||
return nil
|
return nil
|
||||||
@@ -764,7 +764,6 @@ func (obj *EmbdEtcd) CbLoop() {
|
|||||||
obj.exitwg.Add(1)
|
obj.exitwg.Add(1)
|
||||||
defer obj.exitwg.Done()
|
defer obj.exitwg.Done()
|
||||||
cuid := obj.converger.Register()
|
cuid := obj.converger.Register()
|
||||||
cuid.SetName("Etcd: CbLoop")
|
|
||||||
defer cuid.Unregister()
|
defer cuid.Unregister()
|
||||||
if e := obj.Connect(false); e != nil {
|
if e := obj.Connect(false); e != nil {
|
||||||
return // fatal
|
return // fatal
|
||||||
@@ -833,7 +832,6 @@ func (obj *EmbdEtcd) Loop() {
|
|||||||
obj.exitwg.Add(1) // TODO: add these to other go routines?
|
obj.exitwg.Add(1) // TODO: add these to other go routines?
|
||||||
defer obj.exitwg.Done()
|
defer obj.exitwg.Done()
|
||||||
cuid := obj.converger.Register()
|
cuid := obj.converger.Register()
|
||||||
cuid.SetName("Etcd: Loop")
|
|
||||||
defer cuid.Unregister()
|
defer cuid.Unregister()
|
||||||
if e := obj.Connect(false); e != nil {
|
if e := obj.Connect(false); e != nil {
|
||||||
return // fatal
|
return // fatal
|
||||||
|
|||||||
@@ -104,7 +104,7 @@ func CLI(program, version string, flags Flags) error {
|
|||||||
Value: "",
|
Value: "",
|
||||||
Usage: "graphviz filter to use",
|
Usage: "graphviz filter to use",
|
||||||
},
|
},
|
||||||
cli.IntFlag{
|
cli.Int64Flag{
|
||||||
Name: "converged-timeout, t",
|
Name: "converged-timeout, t",
|
||||||
Value: -1,
|
Value: -1,
|
||||||
Usage: "after approximately this many seconds without activity, we're considered to be in a converged state",
|
Usage: "after approximately this many seconds without activity, we're considered to be in a converged state",
|
||||||
|
|||||||
10
lib/main.go
10
lib/main.go
@@ -77,7 +77,7 @@ type Main struct {
|
|||||||
Sema int // add a semaphore with this lock count to each resource
|
Sema int // add a semaphore with this lock count to each resource
|
||||||
Graphviz string // output file for graphviz data
|
Graphviz string // output file for graphviz data
|
||||||
GraphvizFilter string // graphviz filter to use
|
GraphvizFilter string // graphviz filter to use
|
||||||
ConvergedTimeout int // approximately this many seconds of inactivity means we're in a converged state; -1 to disable
|
ConvergedTimeout int64 // approximately this many seconds of inactivity means we're in a converged state; -1 to disable
|
||||||
ConvergedTimeoutNoExit bool // don't exit on converged timeout
|
ConvergedTimeoutNoExit bool // don't exit on converged timeout
|
||||||
ConvergedStatusFile string // file to append converged status to
|
ConvergedStatusFile string // file to append converged status to
|
||||||
MaxRuntime uint // exit after a maximum of approximately this many seconds
|
MaxRuntime uint // exit after a maximum of approximately this many seconds
|
||||||
@@ -313,7 +313,7 @@ func (obj *Main) Run() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// setup converger
|
// setup converger
|
||||||
converger := converger.NewConverger(
|
converger := converger.New(
|
||||||
obj.ConvergedTimeout,
|
obj.ConvergedTimeout,
|
||||||
)
|
)
|
||||||
if obj.ConvergedStatusFile != "" {
|
if obj.ConvergedStatusFile != "" {
|
||||||
@@ -334,10 +334,12 @@ func (obj *Main) Run() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// XXX: should this be moved to later in the code?
|
// XXX: should this be moved to later in the code?
|
||||||
go converger.Loop(true) // main loop for converger, true to start paused
|
go converger.Run(true) // main loop for converger, true to start paused
|
||||||
|
converger.Ready() // block until ready
|
||||||
obj.cleanup = append(obj.cleanup, func() error {
|
obj.cleanup = append(obj.cleanup, func() error {
|
||||||
// TODO: shutdown converger, but make sure that using it in a
|
// TODO: shutdown converger, but make sure that using it in a
|
||||||
// still running embdEtcd struct doesn't block waiting on it...
|
// still running embdEtcd struct doesn't block waiting on it...
|
||||||
|
converger.Shutdown()
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -649,7 +651,7 @@ func (obj *Main) Run() error {
|
|||||||
Logf("error starting graph: %+v", err)
|
Logf("error starting graph: %+v", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
converger.Start() // after Start()
|
converger.Resume() // after Start()
|
||||||
started = true
|
started = true
|
||||||
|
|
||||||
Logf("graph: %+v", obj.ge.Graph()) // show graph
|
Logf("graph: %+v", obj.ge.Graph()) // show graph
|
||||||
|
|||||||
@@ -95,7 +95,7 @@ func run(c *cli.Context, name string, gapiObj gapi.GAPI) error {
|
|||||||
obj.Sema = cliContext.Int("sema")
|
obj.Sema = cliContext.Int("sema")
|
||||||
obj.Graphviz = cliContext.String("graphviz")
|
obj.Graphviz = cliContext.String("graphviz")
|
||||||
obj.GraphvizFilter = cliContext.String("graphviz-filter")
|
obj.GraphvizFilter = cliContext.String("graphviz-filter")
|
||||||
obj.ConvergedTimeout = cliContext.Int("converged-timeout")
|
obj.ConvergedTimeout = cliContext.Int64("converged-timeout")
|
||||||
obj.ConvergedTimeoutNoExit = cliContext.Bool("converged-timeout-no-exit")
|
obj.ConvergedTimeoutNoExit = cliContext.Bool("converged-timeout-no-exit")
|
||||||
obj.ConvergedStatusFile = cliContext.String("converged-status-file")
|
obj.ConvergedStatusFile = cliContext.String("converged-status-file")
|
||||||
obj.MaxRuntime = uint(cliContext.Int("max-runtime"))
|
obj.MaxRuntime = uint(cliContext.Int("max-runtime"))
|
||||||
|
|||||||
Reference in New Issue
Block a user