converger: Rewrite the converger module

I found a deadlock in the converger code, and I realized the code was
sufficiently bad that it needed a good clean up.
This commit is contained in:
James Shubin
2019-02-21 18:35:14 -05:00
parent 450d5c1a59
commit 4860d833c7
8 changed files with 386 additions and 298 deletions

View File

@@ -29,135 +29,248 @@ import (
multierr "github.com/hashicorp/go-multierror" multierr "github.com/hashicorp/go-multierror"
) )
// TODO: we could make a new function that masks out the state of certain // New builds a new converger coordinator.
// UID's, but at the moment the new Timer code has obsoleted the need... func New(timeout int64) *Coordinator {
return &Coordinator{
// Converger is the general interface for implementing a convergence watcher.
type Converger interface { // TODO: need a better name
Register() UID
IsConverged(UID) bool // is the UID converged ?
SetConverged(UID, bool) error // set the converged state of the UID
Unregister(UID)
Start()
Pause()
Loop(bool)
ConvergedTimer(UID) <-chan time.Time
Status() map[uint64]bool
Timeout() int // returns the timeout that this was created with
AddStateFn(string, func(bool) error) error // adds a stateFn with a name
RemoveStateFn(string) error // remove a stateFn with a given name
}
// UID is the interface resources can use to notify with if converged. You'll
// need to use part of the Converger interface to Register initially too.
type UID interface {
ID() uint64 // get Id
Name() string // get a friendly name
SetName(string)
IsValid() bool // has Id been initialized ?
InvalidateID() // set Id to nil
IsConverged() bool
SetConverged(bool) error
Unregister()
ConvergedTimer() <-chan time.Time
StartTimer() (func() error, error) // cancellable is the same as StopTimer()
ResetTimer() error // resets counter to zero
StopTimer() error
}
// converger is an implementation of the Converger interface.
type converger struct {
timeout int // must be zero (instant) or greater seconds to run
converged bool // did we converge (state changes of this run Fn)
channel chan struct{} // signal here to run an isConverged check
control chan bool // control channel for start/pause
mutex *sync.RWMutex // used for controlling access to status and lastid
lastid uint64
status map[uint64]bool
stateFns map[string]func(bool) error // run on converged state changes with state bool
smutex *sync.RWMutex // used for controlling access to stateFns
}
// cuid is an implementation of the UID interface.
type cuid struct {
converger Converger
id uint64
name string // user defined, friendly name
mutex *sync.Mutex
timer chan struct{}
running bool // is the above timer running?
wg *sync.WaitGroup
}
// NewConverger builds a new converger struct.
func NewConverger(timeout int) Converger {
return &converger{
timeout: timeout, timeout: timeout,
channel: make(chan struct{}),
control: make(chan bool),
mutex: &sync.RWMutex{}, mutex: &sync.RWMutex{},
lastid: 0,
status: make(map[uint64]bool), //lastid: 0,
status: make(map[*UID]struct{}),
//converged: false, // initial state
pokeChan: make(chan struct{}, 1), // must be buffered
readyChan: make(chan struct{}), // ready signal
//paused: false, // starts off as started
pauseSignal: make(chan struct{}),
//resumeSignal: make(chan struct{}), // happens on pause
//pausedAck: util.NewEasyAck(), // happens on pause
stateFns: make(map[string]func(bool) error), stateFns: make(map[string]func(bool) error),
smutex: &sync.RWMutex{}, smutex: &sync.RWMutex{},
closeChan: make(chan struct{}),
wg: &sync.WaitGroup{},
} }
} }
// Register assigns a UID to the caller. // Coordinator is the central converger engine.
func (obj *converger) Register() UID { type Coordinator struct {
// timeout must be zero (instant) or greater seconds to run. If it's -1
// then this is disabled, and we never run stateFns.
timeout int64
// mutex is used for controlling access to status and lastid.
mutex *sync.RWMutex
// lastid contains the last uid we used for registration.
//lastid uint64
// status contains a reference to each active UID.
status map[*UID]struct{}
// converged stores the last convergence state. When this changes, we
// run the stateFns.
converged bool
// pokeChan receives a message every time we might need to re-calculate.
pokeChan chan struct{}
// readyChan closes to notify any interested parties that the main loop
// is running.
readyChan chan struct{}
// paused represents if this coordinator is paused or not.
paused bool
// pauseSignal closes to request a pause of this coordinator.
pauseSignal chan struct{}
// resumeSignal closes to request a resume of this coordinator.
resumeSignal chan struct{}
// pausedAck is used to send an ack message saying that we've paused.
pausedAck *util.EasyAck
// stateFns run on converged state changes.
stateFns map[string]func(bool) error
// smutex is used for controlling access to the stateFns map.
smutex *sync.RWMutex
// closeChan closes when we've been requested to shutdown.
closeChan chan struct{}
// wg waits for everything to finish.
wg *sync.WaitGroup
}
// Register creates a new UID which can be used to report converged state. You
// must Unregister each UID before Shutdown will be able to finish running.
func (obj *Coordinator) Register() *UID {
obj.wg.Add(1) // additional tracking for each UID
obj.mutex.Lock() obj.mutex.Lock()
defer obj.mutex.Unlock() defer obj.mutex.Unlock()
obj.lastid++ //obj.lastid++
obj.status[obj.lastid] = false // initialize as not converged uid := &UID{
return &cuid{ timeout: obj.timeout, // copy the timeout here
converger: obj, //id: obj.lastid,
id: obj.lastid, //name: fmt.Sprintf("%d", obj.lastid), // some default
name: fmt.Sprintf("%d", obj.lastid), // some default
poke: obj.poke,
// timer
mutex: &sync.Mutex{}, mutex: &sync.Mutex{},
timer: nil, timer: nil,
running: false, running: false,
wg: &sync.WaitGroup{}, wg: &sync.WaitGroup{},
} }
uid.unregister = func() { obj.Unregister(uid) } // add unregister func
obj.status[uid] = struct{}{} // TODO: add converged state here?
return uid
} }
// IsConverged gets the converged status of a uid. // Unregister removes the UID from the converger coordinator. If you supply an
func (obj *converger) IsConverged(uid UID) bool { // invalid or unregistered uid to this function, it will panic. An unregistered
if !uid.IsValid() { // UID is no longer part of the convergence checking.
panic(fmt.Sprintf("the ID of UID(%s) is nil", uid.Name())) func (obj *Coordinator) Unregister(uid *UID) {
} defer obj.wg.Done() // additional tracking for each UID
obj.mutex.RLock()
isConverged, found := obj.status[uid.ID()] // lookup
obj.mutex.RUnlock()
if !found {
panic("the ID of UID is unregistered")
}
return isConverged
}
// SetConverged updates the converger with the converged state of the UID.
func (obj *converger) SetConverged(uid UID, isConverged bool) error {
if !uid.IsValid() {
return fmt.Errorf("the ID of UID(%s) is nil", uid.Name())
}
obj.mutex.Lock() obj.mutex.Lock()
if _, found := obj.status[uid.ID()]; !found { defer obj.mutex.Unlock()
panic("the ID of UID is unregistered")
if _, exists := obj.status[uid]; !exists {
panic("uid is not registered")
} }
obj.status[uid.ID()] = isConverged // set uid.StopTimer() // ignore any errors
obj.mutex.Unlock() // unlock *before* poke or deadlock! delete(obj.status, uid)
if isConverged != obj.converged { // only poke if it would be helpful }
// run in a go routine so that we never block... just queue up!
// this allows us to send events, even if we haven't started... // Run starts the main loop for the converger coordinator. It is commonly run
go func() { obj.channel <- struct{}{} }() // from a go routine. It blocks until the Shutdown method is run to close it.
// NOTE: when we have very short timeouts, if we start before all the resources
// have joined the map, then it might appear as if we converged before we did!
func (obj *Coordinator) Run(startPaused bool) {
obj.wg.Add(1)
wg := &sync.WaitGroup{} // needed for the startPaused
defer wg.Wait() // don't leave any leftover go routines running
if startPaused {
wg.Add(1)
go func() {
defer wg.Done()
obj.Pause() // ignore any errors
close(obj.readyChan)
}()
} else {
close(obj.readyChan) // we must wait till the wg.Add(1) has happened...
} }
defer obj.wg.Done()
for {
// pause if one was requested...
select {
case <-obj.pauseSignal: // channel closes
obj.pausedAck.Ack() // send ack
// we are paused now, and waiting for resume or exit...
select {
case <-obj.resumeSignal: // channel closes
// resumed!
case <-obj.closeChan: // we can always escape
return
}
case _, ok := <-obj.pokeChan: // we got an event (re-calculate)
if !ok {
return
}
if err := obj.test(); err != nil {
// FIXME: what to do on error ?
}
case <-obj.closeChan: // we can always escape
return
}
}
}
// Ready blocks until the Run loop has started up. This is useful so that we
// don't run Shutdown before we've even started up properly.
func (obj *Coordinator) Ready() {
select {
case <-obj.readyChan:
}
}
// Shutdown sends a signal to the Run loop that it should exit. This blocks
// until it does.
func (obj *Coordinator) Shutdown() {
close(obj.closeChan)
obj.wg.Wait()
close(obj.pokeChan) // free memory?
}
// Pause pauses the coordinator. It should not be called on an already paused
// coordinator. It will block until the coordinator pauses with an
// acknowledgment, or until an exit is requested. If the latter happens it will
// error. It is NOT thread-safe with the Resume() method so only call either one
// at a time.
func (obj *Coordinator) Pause() error {
if obj.paused {
return fmt.Errorf("already paused")
}
obj.pausedAck = util.NewEasyAck()
obj.resumeSignal = make(chan struct{}) // build the resume signal
close(obj.pauseSignal)
// wait for ack (or exit signal)
select {
case <-obj.pausedAck.Wait(): // we got it!
// we're paused
case <-obj.closeChan:
return fmt.Errorf("closing")
}
obj.paused = true
return nil return nil
} }
// isConverged returns true if *every* registered uid has converged. // Resume unpauses the coordinator. It can be safely called on a brand-new
func (obj *converger) isConverged() bool { // coordinator that has just started running without incident. It is NOT
obj.mutex.RLock() // take a read lock // thread-safe with the Pause() method, so only call either one at a time.
defer obj.mutex.RUnlock() func (obj *Coordinator) Resume() {
for _, v := range obj.status { // TODO: do we need a mutex around Resume?
if !obj.paused { // no need to unpause brand-new resources
return
}
obj.pauseSignal = make(chan struct{}) // rebuild for next pause
close(obj.resumeSignal)
obj.poke() // unblock and notice the resume if necessary
obj.paused = false
// no need to wait for it to resume
//return // implied
}
// poke sends a message to the coordinator telling it that it should re-evaluate
// whether we're converged or not. This does not block. Do not run this in a
// goroutine. It must not be called after Shutdown has been called.
func (obj *Coordinator) poke() {
// redundant
//if len(obj.pokeChan) > 0 {
// return
//}
select {
case obj.pokeChan <- struct{}{}:
default: // if chan is now full because more than one poke happened...
}
}
// IsConverged returns true if *every* registered uid has converged. If there
// are no registered UID's, then this will return true.
func (obj *Coordinator) IsConverged() bool {
for _, v := range obj.Status() {
if !v { // everyone must be converged for this to be true if !v { // everyone must be converged for this to be true
return false return false
} }
@@ -165,145 +278,40 @@ func (obj *converger) isConverged() bool {
return true return true
} }
// Unregister dissociates the ConvergedUID from the converged checking. // test evaluates whether we're converged or not and runs the state change. It
func (obj *converger) Unregister(uid UID) { // is NOT thread-safe.
if !uid.IsValid() { func (obj *Coordinator) test() error {
panic(fmt.Sprintf("the ID of UID(%s) is nil", uid.Name())) // TODO: add these checks elsewhere to prevent anything from running?
} if obj.timeout < 0 {
obj.mutex.Lock() return nil // nothing to do (only run if timeout is valid)
uid.StopTimer() // ignore any errors
delete(obj.status, uid.ID())
obj.mutex.Unlock()
uid.InvalidateID()
}
// Start causes a Converger object to start or resume running.
func (obj *converger) Start() {
obj.control <- true
}
// Pause causes a Converger object to stop running temporarily.
func (obj *converger) Pause() { // FIXME: add a sync ACK on pause before return
obj.control <- false
}
// Loop is the main loop for a Converger object. It usually runs in a goroutine.
// TODO: we could eventually have each resource tell us as soon as it converges,
// and then keep track of the time delays here, to avoid callers needing select.
// NOTE: when we have very short timeouts, if we start before all the resources
// have joined the map, then it might appear as if we converged before we did!
func (obj *converger) Loop(startPaused bool) {
if obj.control == nil {
panic("converger not initialized correctly")
}
if startPaused { // start paused without racing
select {
case e := <-obj.control:
if !e {
panic("converger expected true")
}
}
}
for {
select {
case e := <-obj.control: // expecting "false" which means pause!
if e {
panic("converger expected false")
}
// now i'm paused...
select {
case e := <-obj.control:
if !e {
panic("converger expected true")
}
// restart
// kick once to refresh the check...
go func() { obj.channel <- struct{}{} }()
continue
} }
case <-obj.channel: converged := obj.IsConverged()
if !obj.isConverged() { defer func() {
if obj.converged { // we're doing a state change obj.converged = converged // set this only at the end...
}()
if !converged {
if !obj.converged { // were we previously also not converged?
return nil // nothing to do
}
// we're doing a state change
// call the arbitrary functions (takes a read lock!) // call the arbitrary functions (takes a read lock!)
if err := obj.runStateFns(false); err != nil { return obj.runStateFns(false)
// FIXME: what to do on error ?
}
}
obj.converged = false
continue
} }
// we have converged! // we have converged!
if obj.timeout >= 0 { // only run if timeout is valid if obj.converged { // were we previously also converged?
if !obj.converged { // we're doing a state change return nil // nothing to do
}
// call the arbitrary functions (takes a read lock!) // call the arbitrary functions (takes a read lock!)
if err := obj.runStateFns(true); err != nil { return obj.runStateFns(true)
// FIXME: what to do on error ?
}
}
}
obj.converged = true
// loop and wait again...
}
}
} }
// ConvergedTimer adds a timeout to a select call and blocks until then. // runStateFns runs the list of stored state functions.
// TODO: this means we could eventually have per resource converged timeouts func (obj *Coordinator) runStateFns(converged bool) error {
func (obj *converger) ConvergedTimer(uid UID) <-chan time.Time {
// be clever: if i'm already converged, this timeout should block which
// avoids unnecessary new signals being sent! this avoids fast loops if
// we have a low timeout, or in particular a timeout == 0
if uid.IsConverged() {
// blocks the case statement in select forever!
return util.TimeAfterOrBlock(-1)
}
return util.TimeAfterOrBlock(obj.timeout)
}
// Status returns a map of the converged status of each UID.
func (obj *converger) Status() map[uint64]bool {
status := make(map[uint64]bool)
obj.mutex.RLock() // take a read lock
defer obj.mutex.RUnlock()
for k, v := range obj.status { // make a copy to avoid the mutex
status[k] = v
}
return status
}
// Timeout returns the timeout in seconds that converger was created with. This
// is useful to avoid passing in the timeout value separately when you're
// already passing in the Converger struct.
func (obj *converger) Timeout() int {
return obj.timeout
}
// AddStateFn adds a state function to be run on change of converged state.
func (obj *converger) AddStateFn(name string, stateFn func(bool) error) error {
obj.smutex.Lock()
defer obj.smutex.Unlock()
if _, exists := obj.stateFns[name]; exists {
return fmt.Errorf("a stateFn with that name already exists")
}
obj.stateFns[name] = stateFn
return nil
}
// RemoveStateFn adds a state function to be run on change of converged state.
func (obj *converger) RemoveStateFn(name string) error {
obj.smutex.Lock()
defer obj.smutex.Unlock()
if _, exists := obj.stateFns[name]; !exists {
return fmt.Errorf("a stateFn with that name doesn't exist")
}
delete(obj.stateFns, name)
return nil
}
// runStateFns runs the listed of stored state functions.
func (obj *converger) runStateFns(converged bool) error {
obj.smutex.RLock() obj.smutex.RLock()
defer obj.smutex.RUnlock() defer obj.smutex.RUnlock()
var keys []string var keys []string
@@ -322,70 +330,119 @@ func (obj *converger) runStateFns(converged bool) error {
return err return err
} }
// ID returns the unique id of this UID object. // AddStateFn adds a state function to be run on change of converged state.
func (obj *cuid) ID() uint64 { func (obj *Coordinator) AddStateFn(name string, stateFn func(bool) error) error {
return obj.id obj.smutex.Lock()
defer obj.smutex.Unlock()
if _, exists := obj.stateFns[name]; exists {
return fmt.Errorf("a stateFn with that name already exists")
}
obj.stateFns[name] = stateFn
return nil
} }
// Name returns a user defined name for the specific cuid. // RemoveStateFn removes a state function from running on change of converged
func (obj *cuid) Name() string { // state.
return obj.name func (obj *Coordinator) RemoveStateFn(name string) error {
obj.smutex.Lock()
defer obj.smutex.Unlock()
if _, exists := obj.stateFns[name]; !exists {
return fmt.Errorf("a stateFn with that name doesn't exist")
}
delete(obj.stateFns, name)
return nil
} }
// SetName sets a user defined name for the specific cuid. // Status returns a map of the converged status of each UID.
func (obj *cuid) SetName(name string) { func (obj *Coordinator) Status() map[*UID]bool {
obj.name = name status := make(map[*UID]bool)
obj.mutex.RLock() // take a read lock
defer obj.mutex.RUnlock()
for k := range obj.status {
status[k] = k.IsConverged()
}
return status
} }
// IsValid tells us if the id is valid or has already been destroyed. // Timeout returns the timeout in seconds that converger was created with. This
func (obj *cuid) IsValid() bool { // is useful to avoid passing in the timeout value separately when you're
return obj.id != 0 // an id of 0 is invalid // already passing in the Coordinator struct.
func (obj *Coordinator) Timeout() int64 {
return obj.timeout
} }
// InvalidateID marks the id as no longer valid. // UID represents one of the probes for the converger coordinator. It is created
func (obj *cuid) InvalidateID() { // by calling the Register method of the Coordinator struct. It should be freed
obj.id = 0 // an id of 0 is invalid // after use with Unregister.
type UID struct {
// timeout is a copy of the main timeout. It could eventually be used
// for per-UID timeouts too.
timeout int64
// isConverged stores the convergence state of this particular UID.
isConverged bool
// poke stores a reference to the main poke function.
poke func()
// unregister stores a reference to the unregister function.
unregister func()
// timer
mutex *sync.Mutex
timer chan struct{}
running bool // is the timer running?
wg *sync.WaitGroup
} }
// IsConverged is a helper function to the regular IsConverged method. // Unregister removes this UID from the converger coordinator. An unregistered
func (obj *cuid) IsConverged() bool { // UID is no longer part of the convergence checking.
return obj.converger.IsConverged(obj) func (obj *UID) Unregister() {
obj.unregister()
} }
// SetConverged is a helper function to the regular SetConverged notification. // IsConverged reports whether this UID is converged or not.
func (obj *cuid) SetConverged(isConverged bool) error { func (obj *UID) IsConverged() bool {
return obj.converger.SetConverged(obj, isConverged) return obj.isConverged
} }
// Unregister is a helper function to unregister myself. // SetConverged sets the convergence state of this UID. This is used by the
func (obj *cuid) Unregister() { // running timer if one is started. The timer will overwrite any value set by
obj.converger.Unregister(obj) // this method.
func (obj *UID) SetConverged(isConverged bool) {
obj.isConverged = isConverged
obj.poke() // notify of change
} }
// ConvergedTimer is a helper around the regular ConvergedTimer method. // ConvergedTimer adds a timeout to a select call and blocks until then.
func (obj *cuid) ConvergedTimer() <-chan time.Time { // TODO: this means we could eventually have per resource converged timeouts
return obj.converger.ConvergedTimer(obj) func (obj *UID) ConvergedTimer() <-chan time.Time {
// be clever: if i'm already converged, this timeout should block which
// avoids unnecessary new signals being sent! this avoids fast loops if
// we have a low timeout, or in particular a timeout == 0
if obj.IsConverged() {
// blocks the case statement in select forever!
return util.TimeAfterOrBlock(-1)
}
return util.TimeAfterOrBlock(int(obj.timeout))
} }
// StartTimer runs an invisible timer that automatically converges on timeout. // StartTimer runs a timer that sets us as converged on timeout. It also returns
func (obj *cuid) StartTimer() (func() error, error) { // a handle to the StopTimer function which should be run before exit.
func (obj *UID) StartTimer() (func() error, error) {
obj.mutex.Lock() obj.mutex.Lock()
if !obj.running { defer obj.mutex.Unlock()
obj.timer = make(chan struct{}) if obj.running {
obj.running = true
} else {
obj.mutex.Unlock()
return obj.StopTimer, fmt.Errorf("timer already started") return obj.StopTimer, fmt.Errorf("timer already started")
} }
obj.mutex.Unlock() obj.timer = make(chan struct{})
obj.running = true
obj.wg.Add(1) obj.wg.Add(1)
go func() { go func() {
defer obj.wg.Done() defer obj.wg.Done()
for { for {
select { select {
case _, ok := <-obj.timer: // reset signal channel case _, ok := <-obj.timer: // reset signal channel
if !ok { // channel is closed if !ok {
return // false to exit return
} }
obj.SetConverged(false) obj.SetConverged(false)
@@ -393,8 +450,8 @@ func (obj *cuid) StartTimer() (func() error, error) {
obj.SetConverged(true) // converged! obj.SetConverged(true) // converged!
select { select {
case _, ok := <-obj.timer: // reset signal channel case _, ok := <-obj.timer: // reset signal channel
if !ok { // channel is closed if !ok {
return // false to exit return
} }
} }
} }
@@ -403,8 +460,8 @@ func (obj *cuid) StartTimer() (func() error, error) {
return obj.StopTimer, nil return obj.StopTimer, nil
} }
// ResetTimer resets the counter to zero if using a StartTimer internally. // ResetTimer resets the timer to zero.
func (obj *cuid) ResetTimer() error { func (obj *UID) ResetTimer() error {
obj.mutex.Lock() obj.mutex.Lock()
defer obj.mutex.Unlock() defer obj.mutex.Unlock()
if obj.running { if obj.running {
@@ -414,8 +471,8 @@ func (obj *cuid) ResetTimer() error {
return fmt.Errorf("timer hasn't been started") return fmt.Errorf("timer hasn't been started")
} }
// StopTimer stops the running timer permanently until a StartTimer is run. // StopTimer stops the running timer.
func (obj *cuid) StopTimer() error { func (obj *UID) StopTimer() error {
obj.mutex.Lock() obj.mutex.Lock()
defer obj.mutex.Unlock() defer obj.mutex.Unlock()
if !obj.running { if !obj.running {

View File

@@ -0,0 +1,31 @@
// Mgmt
// Copyright (C) 2013-2018+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// +build !root
package converger
import (
"testing"
)
func TestBufferedChan1(t *testing.T) {
ch := make(chan bool, 1)
ch <- true
close(ch) // closing a channel that's not empty should not block
// must be able to exit without blocking anywhere
}

View File

@@ -42,7 +42,7 @@ type Engine struct {
// Prefix is a unique directory prefix which can be used. It should be // Prefix is a unique directory prefix which can be used. It should be
// created if needed. // created if needed.
Prefix string Prefix string
Converger converger.Converger Converger *converger.Coordinator
Debug bool Debug bool
Logf func(format string, v ...interface{}) Logf func(format string, v ...interface{})

View File

@@ -51,7 +51,7 @@ type State struct {
// created if needed. // created if needed.
Prefix string Prefix string
//Converger converger.Converger //Converger *converger.Coordinator
// Debug turns on additional output and behaviours. // Debug turns on additional output and behaviours.
Debug bool Debug bool
@@ -85,8 +85,8 @@ type State struct {
starter bool // do we have an indegree of 0 ? starter bool // do we have an indegree of 0 ?
working bool // is the Main() loop running ? working bool // is the Main() loop running ?
cuid converger.UID // primary converger cuid *converger.UID // primary converger
tuid converger.UID // secondary converger tuid *converger.UID // secondary converger
init *engine.Init // a copy of the init struct passed to res Init init *engine.Init // a copy of the init struct passed to res Init
} }

View File

@@ -211,7 +211,7 @@ type EmbdEtcd struct { // EMBeddeD etcd
flags Flags flags Flags
prefix string // folder prefix to use for misc storage prefix string // folder prefix to use for misc storage
converger converger.Converger // converged tracking converger *converger.Coordinator // converged tracking
// etcd server related // etcd server related
serverwg sync.WaitGroup // wait for server to shutdown serverwg sync.WaitGroup // wait for server to shutdown
@@ -221,7 +221,7 @@ type EmbdEtcd struct { // EMBeddeD etcd
} }
// NewEmbdEtcd creates the top level embedded etcd struct client and server obj. // NewEmbdEtcd creates the top level embedded etcd struct client and server obj.
func NewEmbdEtcd(hostname string, seeds, clientURLs, serverURLs, advertiseClientURLs, advertiseServerURLs etcdtypes.URLs, noServer bool, noNetwork bool, idealClusterSize uint16, flags Flags, prefix string, converger converger.Converger) *EmbdEtcd { func NewEmbdEtcd(hostname string, seeds, clientURLs, serverURLs, advertiseClientURLs, advertiseServerURLs etcdtypes.URLs, noServer bool, noNetwork bool, idealClusterSize uint16, flags Flags, prefix string, converger *converger.Coordinator) *EmbdEtcd {
endpoints := make(etcdtypes.URLsMap) endpoints := make(etcdtypes.URLsMap)
if hostname == seedSentinel { // safety if hostname == seedSentinel { // safety
return nil return nil
@@ -764,7 +764,6 @@ func (obj *EmbdEtcd) CbLoop() {
obj.exitwg.Add(1) obj.exitwg.Add(1)
defer obj.exitwg.Done() defer obj.exitwg.Done()
cuid := obj.converger.Register() cuid := obj.converger.Register()
cuid.SetName("Etcd: CbLoop")
defer cuid.Unregister() defer cuid.Unregister()
if e := obj.Connect(false); e != nil { if e := obj.Connect(false); e != nil {
return // fatal return // fatal
@@ -833,7 +832,6 @@ func (obj *EmbdEtcd) Loop() {
obj.exitwg.Add(1) // TODO: add these to other go routines? obj.exitwg.Add(1) // TODO: add these to other go routines?
defer obj.exitwg.Done() defer obj.exitwg.Done()
cuid := obj.converger.Register() cuid := obj.converger.Register()
cuid.SetName("Etcd: Loop")
defer cuid.Unregister() defer cuid.Unregister()
if e := obj.Connect(false); e != nil { if e := obj.Connect(false); e != nil {
return // fatal return // fatal

View File

@@ -104,7 +104,7 @@ func CLI(program, version string, flags Flags) error {
Value: "", Value: "",
Usage: "graphviz filter to use", Usage: "graphviz filter to use",
}, },
cli.IntFlag{ cli.Int64Flag{
Name: "converged-timeout, t", Name: "converged-timeout, t",
Value: -1, Value: -1,
Usage: "after approximately this many seconds without activity, we're considered to be in a converged state", Usage: "after approximately this many seconds without activity, we're considered to be in a converged state",

View File

@@ -77,7 +77,7 @@ type Main struct {
Sema int // add a semaphore with this lock count to each resource Sema int // add a semaphore with this lock count to each resource
Graphviz string // output file for graphviz data Graphviz string // output file for graphviz data
GraphvizFilter string // graphviz filter to use GraphvizFilter string // graphviz filter to use
ConvergedTimeout int // approximately this many seconds of inactivity means we're in a converged state; -1 to disable ConvergedTimeout int64 // approximately this many seconds of inactivity means we're in a converged state; -1 to disable
ConvergedTimeoutNoExit bool // don't exit on converged timeout ConvergedTimeoutNoExit bool // don't exit on converged timeout
ConvergedStatusFile string // file to append converged status to ConvergedStatusFile string // file to append converged status to
MaxRuntime uint // exit after a maximum of approximately this many seconds MaxRuntime uint // exit after a maximum of approximately this many seconds
@@ -313,7 +313,7 @@ func (obj *Main) Run() error {
} }
// setup converger // setup converger
converger := converger.NewConverger( converger := converger.New(
obj.ConvergedTimeout, obj.ConvergedTimeout,
) )
if obj.ConvergedStatusFile != "" { if obj.ConvergedStatusFile != "" {
@@ -334,10 +334,12 @@ func (obj *Main) Run() error {
} }
// XXX: should this be moved to later in the code? // XXX: should this be moved to later in the code?
go converger.Loop(true) // main loop for converger, true to start paused go converger.Run(true) // main loop for converger, true to start paused
converger.Ready() // block until ready
obj.cleanup = append(obj.cleanup, func() error { obj.cleanup = append(obj.cleanup, func() error {
// TODO: shutdown converger, but make sure that using it in a // TODO: shutdown converger, but make sure that using it in a
// still running embdEtcd struct doesn't block waiting on it... // still running embdEtcd struct doesn't block waiting on it...
converger.Shutdown()
return nil return nil
}) })
@@ -649,7 +651,7 @@ func (obj *Main) Run() error {
Logf("error starting graph: %+v", err) Logf("error starting graph: %+v", err)
continue continue
} }
converger.Start() // after Start() converger.Resume() // after Start()
started = true started = true
Logf("graph: %+v", obj.ge.Graph()) // show graph Logf("graph: %+v", obj.ge.Graph()) // show graph

View File

@@ -95,7 +95,7 @@ func run(c *cli.Context, name string, gapiObj gapi.GAPI) error {
obj.Sema = cliContext.Int("sema") obj.Sema = cliContext.Int("sema")
obj.Graphviz = cliContext.String("graphviz") obj.Graphviz = cliContext.String("graphviz")
obj.GraphvizFilter = cliContext.String("graphviz-filter") obj.GraphvizFilter = cliContext.String("graphviz-filter")
obj.ConvergedTimeout = cliContext.Int("converged-timeout") obj.ConvergedTimeout = cliContext.Int64("converged-timeout")
obj.ConvergedTimeoutNoExit = cliContext.Bool("converged-timeout-no-exit") obj.ConvergedTimeoutNoExit = cliContext.Bool("converged-timeout-no-exit")
obj.ConvergedStatusFile = cliContext.String("converged-status-file") obj.ConvergedStatusFile = cliContext.String("converged-status-file")
obj.MaxRuntime = uint(cliContext.Int("max-runtime")) obj.MaxRuntime = uint(cliContext.Int("max-runtime"))