engine: Resources package rewrite

This giant patch makes some much needed improvements to the code base.

* The engine has been rewritten and lives within engine/graph/
* All of the common interfaces and code now live in engine/
* All of the resources are in one package called engine/resources/
* The Res API can use different "traits" from engine/traits/
* The Res API has been simplified to hide many of the old internals
* The Watch & Process loops were previously inverted, but is now fixed
* The likelihood of package cycles has been reduced drastically
* And much, much more...

Unfortunately, some code had to be temporarily removed. The remote code
had to be taken out, as did the prometheus code. We hope to have these
back in new forms as soon as possible.
This commit is contained in:
James Shubin
2018-03-13 12:02:44 -04:00
parent ef49aa7e08
commit 9969286224
140 changed files with 9130 additions and 8764 deletions

View File

@@ -27,12 +27,16 @@ import (
"time"
"github.com/purpleidea/mgmt/converger"
"github.com/purpleidea/mgmt/engine"
"github.com/purpleidea/mgmt/engine/graph"
"github.com/purpleidea/mgmt/engine/graph/autogroup"
_ "github.com/purpleidea/mgmt/engine/resources" // let register's run
"github.com/purpleidea/mgmt/etcd"
"github.com/purpleidea/mgmt/gapi"
"github.com/purpleidea/mgmt/gapi/empty"
"github.com/purpleidea/mgmt/pgp"
"github.com/purpleidea/mgmt/pgraph"
"github.com/purpleidea/mgmt/prometheus"
"github.com/purpleidea/mgmt/resources"
"github.com/purpleidea/mgmt/util"
etcdtypes "github.com/coreos/etcd/pkg/types"
@@ -61,7 +65,7 @@ type Main struct {
AllowTmpPrefix bool // allow creation of a new temporary prefix if main prefix is unavailable
Deploy *gapi.Deploy // deploy object including GAPI for static deploys
DeployFs resources.Fs // used for static deploys
DeployFs engine.Fs // used for static deploys
NoWatch bool // do not change graph under any circumstances
NoConfigWatch bool // do not update graph due to config changes
@@ -99,12 +103,14 @@ type Main struct {
Prometheus bool // enable prometheus metrics
PrometheusListen string // prometheus instance bind specification
exit chan error // exit signal
ge *graph.Engine
exit *util.EasyExit // exit signal
cleanup []func() error // list of functions to run on close
}
// Init initializes the main struct after it performs some validation.
func (obj *Main) Init() error {
// Validate validates the main structure without making any modifications to it.
func (obj *Main) Validate() error {
if obj.Program == "" || obj.Version == "" {
return fmt.Errorf("you must set the Program and Version strings")
}
@@ -113,6 +119,11 @@ func (obj *Main) Init() error {
return fmt.Errorf("choosing a prefix and the request for a tmp prefix is illogical")
}
return nil
}
// Init initializes the main struct after it performs some validation.
func (obj *Main) Init() error {
// if we've turned off watching, then be explicit and disable them all!
// if all the watches are disabled, then it's equivalent to no watching
if obj.NoWatch {
@@ -164,19 +175,21 @@ func (obj *Main) Init() error {
return errwrap.Wrapf(err, "the AdvertiseServerURLs didn't parse correctly")
}
obj.exit = make(chan error)
obj.exit = util.NewEasyExit()
obj.cleanup = []func() error{}
return nil
}
// Exit causes a safe shutdown. This is often attached to the ^C signal handler.
func (obj *Main) Exit(err error) {
obj.exit <- err // trigger an exit!
}
// Run is the main execution entrypoint to run mgmt.
func (obj *Main) Run() error {
Logf := func(format string, v ...interface{}) {
log.Printf("main: "+format, v...)
}
hello(obj.Program, obj.Version, obj.Flags) // say hello!
defer Logf("Goodbye!")
defer obj.exit.Done(nil) // ensure this gets called even if Exit doesn't
hostname, err := os.Hostname() // a sensible default
// allow passing in the hostname, instead of using the system setting
@@ -200,17 +213,13 @@ func (obj *Main) Run() error {
if prefix, err = ioutil.TempDir("", obj.Program+"-"+hostname+"-"); err != nil {
return fmt.Errorf("can't create temporary prefix")
}
log.Println("Main: Warning: Working prefix directory is temporary!")
Logf("warning: working prefix directory is temporary!")
} else {
return fmt.Errorf("can't create prefix")
}
}
log.Printf("Main: Working prefix is: %s", prefix)
pgraphPrefix := fmt.Sprintf("%s/", path.Join(prefix, "pgraph")) // pgraph namespace
if err := os.MkdirAll(pgraphPrefix, 0770); err != nil {
return errwrap.Wrapf(err, "can't create pgraph prefix")
}
Logf("working prefix is: %s", prefix)
var prom *prometheus.Prometheus
if obj.Prometheus {
@@ -218,17 +227,24 @@ func (obj *Main) Run() error {
Listen: obj.PrometheusListen,
}
if err := prom.Init(); err != nil {
return errwrap.Wrapf(err, "can't create initiate Prometheus instance")
return errwrap.Wrapf(err, "can't initialize prometheus instance")
}
log.Printf("Main: Prometheus: Starting instance on %s", prom.Listen)
Logf("prometheus: starting instance on: %s", prom.Listen)
if err := prom.Start(); err != nil {
return errwrap.Wrapf(err, "can't start initiate Prometheus instance")
return errwrap.Wrapf(err, "can't start prometheus instance")
}
if err := prom.InitKindMetrics(resources.RegisteredResourcesNames()); err != nil {
if err := prom.InitKindMetrics(engine.RegisteredResourcesNames()); err != nil {
return errwrap.Wrapf(err, "can't initialize kind-specific prometheus metrics")
}
obj.cleanup = append(obj.cleanup, func() error {
Logf("prometheus: stopping instance")
if err := prom.Stop(); err != nil {
return errwrap.Wrapf(err, "the prometheus instance exited poorly")
}
return nil
})
}
if !obj.NoPgp {
@@ -249,7 +265,6 @@ func (obj *Main) Run() error {
}
if obj.pgpKeys == nil {
identity := fmt.Sprintf("%s <%s> %s", obj.Program, "root@"+hostname, "generated by "+obj.Program)
if p := obj.PgpIdentity; p != nil {
identity = *p
@@ -274,36 +289,59 @@ func (obj *Main) Run() error {
// TODO: Import admin key
}
oldGraph := &pgraph.Graph{}
graph := &resources.MGraph{}
// pass in the information we need
graph.Debug = obj.Flags.Debug
graph.Init()
exitchan := make(chan struct{}) // exit on close
wg := &sync.WaitGroup{} // waitgroup for inner loop & goroutines
// exit after `max-runtime` seconds for no reason at all...
if i := obj.MaxRuntime; i > 0 {
wg.Add(1)
go func() {
time.Sleep(time.Duration(i) * time.Second)
obj.Exit(nil)
defer wg.Done()
select {
case <-time.After(time.Duration(i) * time.Second):
obj.exit.Done(fmt.Errorf("max runtime reached")) // trigger exit signal
case <-obj.exit.Signal(): // exit early on exit signal
return
}
}()
}
// setup converger
converger := converger.NewConverger(
obj.ConvergedTimeout,
nil, // stateFn gets added in by EmbdEtcd
)
if obj.ConvergedStatusFile != "" {
converger.AddStateFn("status-file", func(converged bool) error {
Logf("converged status is: %t", converged)
return appendConvergedStatus(obj.ConvergedStatusFile, converged)
})
}
if obj.ConvergedTimeout >= 0 && !obj.ConvergedTimeoutNoExit {
converger.AddStateFn("converged-exit", func(converged bool) error {
if converged {
Logf("converged for %d seconds, exiting!", obj.ConvergedTimeout)
obj.exit.Done(nil) // trigger an exit!
}
return nil
})
}
// XXX: should this be moved to later in the code?
go converger.Loop(true) // main loop for converger, true to start paused
// TODO: will this shut things down prematurely?
converger.Start() // better start this anyways...
obj.cleanup = append(obj.cleanup, func() error {
// TODO: shutdown converger, but make sure that using it in a
// still running embdEtcd struct doesn't block waiting on it...
return nil
})
// embedded etcd
if len(obj.seeds) == 0 {
log.Printf("Main: Seeds: No seeds specified!")
Logf("etcd: seeds: no seeds specified!")
} else {
log.Printf("Main: Seeds(%d): %v", len(obj.seeds), obj.seeds)
Logf("etcd: seeds(%d): %+v", len(obj.seeds), obj.seeds)
}
EmbdEtcd := etcd.NewEmbdEtcd(
embdEtcd := etcd.NewEmbdEtcd(
hostname,
obj.seeds,
obj.clientURLs,
@@ -320,62 +358,32 @@ func (obj *Main) Run() error {
prefix,
converger,
)
if EmbdEtcd == nil {
// TODO: verify EmbdEtcd is not nil below...
obj.Exit(fmt.Errorf("Main: Etcd: Creation failed"))
} else if err := EmbdEtcd.Startup(); err != nil { // startup (returns when etcd main loop is running)
obj.Exit(fmt.Errorf("Main: Etcd: Startup failed: %v", err))
if embdEtcd == nil {
return fmt.Errorf("etcd: creation failed")
} else if err := embdEtcd.Startup(); err != nil { // startup (returns when etcd main loop is running)
return errwrap.Wrapf(err, "etcd: startup failed")
}
obj.cleanup = append(obj.cleanup, func() error {
// cleanup etcd main loop last so it can process everything first
err := embdEtcd.Destroy() // shutdown and cleanup etcd
return errwrap.Wrapf(err, "etcd: exited poorly")
})
// wait for etcd server to be ready before continuing...
// XXX: this is wrong if we're not going to be a server! we'll block!!!
// select {
// case <-EmbdEtcd.ServerReady():
// log.Printf("Main: Etcd: Server: Ready!")
// case <-embdEtcd.ServerReady():
// Logf("etcd: server: ready!")
// // pass
// case <-time.After(((etcd.MaxStartServerTimeout * etcd.MaxStartServerRetries) + 1) * time.Second):
// obj.Exit(fmt.Errorf("Main: Etcd: Startup timeout"))
// return fmt.Errorf("etcd: startup timeout")
// }
time.Sleep(1 * time.Second) // XXX: temporary workaround
convergerStateFn := func(b bool) error {
var err error
if obj.ConvergedStatusFile != "" {
if obj.Flags.Debug {
log.Printf("Main: Converged status is: %t", b)
}
err = appendConvergedStatus(obj.ConvergedStatusFile, b)
}
// exit if we are using the converged timeout and we are the
// root node. otherwise, if we are a child node in a remote
// execution hierarchy, we should only notify our converged
// state and wait for the parent to trigger the exit.
if t := obj.ConvergedTimeout; t >= 0 {
if b && !obj.ConvergedTimeoutNoExit {
log.Printf("Main: Converged for %d seconds, exiting!", t)
obj.Exit(nil) // trigger an exit!
}
return err
}
// send our individual state into etcd for others to see
e := etcd.SetHostnameConverged(EmbdEtcd, hostname, b) // TODO: what should happen on error?
if err == nil {
return e
} else if e != nil {
err = multierr.Append(err, e) // list of errors
}
return err
}
if EmbdEtcd != nil {
converger.SetStateFn(convergerStateFn)
}
// implementation of the World API (alternates can be substituted in)
// implementation of the World API (alternatives can be substituted in)
world := &etcd.World{
Hostname: hostname,
EmbdEtcd: EmbdEtcd,
EmbdEtcd: embdEtcd,
MetadataPrefix: MetadataPrefix,
StoragePrefix: StoragePrefix,
StandaloneFs: obj.DeployFs, // used for static deploys
@@ -385,21 +393,26 @@ func (obj *Main) Run() error {
},
}
graph.Data = &resources.ResData{
Hostname: hostname,
Converger: converger,
Prometheus: prom,
World: world,
Prefix: pgraphPrefix,
Debug: obj.Flags.Debug,
obj.ge = &graph.Engine{
Program: obj.Program,
Hostname: hostname,
World: world,
Prefix: fmt.Sprintf("%s/", path.Join(prefix, "engine")),
Converger: converger,
//Prometheus: prom, // TODO: implement this via a general Status API
Debug: obj.Flags.Debug,
Logf: func(format string, v ...interface{}) {
log.Printf("resources: "+format, v...)
log.Printf("engine: "+format, v...)
},
}
exitchan := make(chan struct{}) // exit on close
wg := &sync.WaitGroup{} // waitgroup for inner loop (go routine)
if err := obj.ge.Init(); err != nil {
return errwrap.Wrapf(err, "engine: creation failed")
}
// After this point, the inner "main loop" must run, so that the engine
// can get closed with the deploy close via the deploy chan shutdown...
// main loop logic starts here
deployChan := make(chan *gapi.Deploy)
var gapiImpl gapi.GAPI // active GAPI implementation
gapiImpl = nil // starts off missing
@@ -408,47 +421,53 @@ func (obj *Main) Run() error {
gapiChan = nil // starts off blocked
wg.Add(1)
go func() {
defer Logf("loop: exited")
defer wg.Done()
first := true // first loop or not
started := false // track engine started state
var mainDeploy *gapi.Deploy
for {
log.Println("Main: Waiting...")
Logf("waiting...")
// The GAPI should always kick off an event on Next() at
// startup when (and if) it indeed has a graph to share!
fastPause := false
select {
case deploy, ok := <-deployChan:
if !ok { // channel closed
if obj.Flags.Debug {
log.Printf("Main: Deploy: exited")
}
Logf("deploy: exited")
deployChan = nil // disable it
if gapiImpl != nil { // currently running...
gapiChan = nil
if err := gapiImpl.Close(); err != nil {
err = errwrap.Wrapf(err, "the GAPI closed poorly")
log.Printf("Main: Deploy: GAPI: Final close failed: %+v", err)
err = errwrap.Wrapf(err, "the gapi closed poorly")
Logf("deploy: gapi: final close failed: %+v", err)
}
}
if started {
obj.ge.Pause(false)
}
// must be paused before this is run
obj.ge.Close()
return // this is the only place we exit
}
if deploy == nil {
log.Printf("Main: Deploy: Received empty deploy")
Logf("deploy: received empty deploy")
continue
}
mainDeploy = deploy // save this one
gapiObj := mainDeploy.GAPI
if gapiObj == nil {
log.Printf("Main: Deploy: Received empty GAPI")
Logf("deploy: received empty gapi")
continue
}
if gapiImpl != nil { // currently running...
gapiChan = nil
if err := gapiImpl.Close(); err != nil {
err = errwrap.Wrapf(err, "the GAPI closed poorly")
log.Printf("Main: Deploy: GAPI: Close failed: %+v", err)
err = errwrap.Wrapf(err, "the gapi closed poorly")
Logf("deploy: gapi: close failed: %+v", err)
}
}
gapiImpl = gapiObj // copy it to active
@@ -468,14 +487,14 @@ func (obj *Main) Run() error {
},
}
if obj.Flags.Debug {
log.Printf("Main: GAPI: Init...")
Logf("gapi: init...")
}
if err := gapiImpl.Init(data); err != nil {
log.Printf("Main: GAPI: Init failed: %+v", err)
Logf("gapi: init failed: %+v", err)
// TODO: consider running previous GAPI?
} else {
if obj.Flags.Debug {
log.Printf("Main: GAPI: Next...")
Logf("gapi: next...")
}
// this must generate at least one event for it to work
gapiChan = gapiImpl.Next() // stream of graph switch events!
@@ -485,7 +504,7 @@ func (obj *Main) Run() error {
case next, ok := <-gapiChan:
if !ok { // channel closed
if obj.Flags.Debug {
log.Printf("Main: GAPI exited")
Logf("gapi exited")
}
gapiChan = nil // disable it
continue
@@ -495,14 +514,14 @@ func (obj *Main) Run() error {
// TODO: do we want to block exits and wait?
// TODO: we might want to wait for the next GAPI
if next.Exit {
obj.Exit(next.Err) // trigger exit
continue // wait for exitchan
obj.exit.Done(next.Err) // trigger exit
continue // wait for exitchan
}
// the gapi lets us send an error to the channel
// this means there was a failure, but not fatal
if err := next.Err; err != nil {
log.Printf("Main: Error with graph stream: %v", err)
Logf("error with graph stream: %+v", err)
continue // wait for another event
}
// everything else passes through to cause a compile!
@@ -514,141 +533,135 @@ func (obj *Main) Run() error {
}
if gapiImpl == nil { // TODO: can this ever happen anymore?
log.Printf("Main: GAPI is empty!")
Logf("gapi is empty!")
continue
}
if first {
converger.Pause() // it's already started!
}
// we need the vertices to be paused to work on them, so
// run graph vertex LOCK...
if !first { // TODO: we can flatten this check out I think
converger.Pause() // FIXME: add sync wait?
graph.Pause(fastPause) // sync
//graph.UnGroup() // FIXME: implement me if needed!
}
// make the graph from yaml, lib, puppet->yaml, or dsl!
newGraph, err := gapiImpl.Graph() // generate graph!
if err != nil {
log.Printf("Main: Error creating new graph: %v", err)
// unpause!
if !first {
graph.Start(first) // sync
converger.Start() // after Start()
}
Logf("error creating new graph: %+v", err)
continue
}
if obj.Flags.Debug {
log.Printf("Main: New Graph: %v", newGraph)
Logf("new graph: %+v", newGraph)
}
// this edits the paused vertices, but it is safe to do
// so even if we don't use this new graph, since those
// value should be the same for existing vertices...
for _, v := range newGraph.Vertices() {
m := resources.VtoR(v).Meta()
// apply the global noop parameter if requested
if mainDeploy.Noop {
m.Noop = mainDeploy.Noop
}
// append the semaphore to each resource
if mainDeploy.Sema > 0 { // NOTE: size == 0 would block
// a semaphore with an empty id is valid
m.Sema = append(m.Sema, fmt.Sprintf(":%d", mainDeploy.Sema))
}
}
// We don't have to "UnGroup()" to compare, since we
// save the old graph to use when we compare.
// TODO: Does this hurt performance or graph changes ?
log.Printf("Main: GraphSync...")
vertexCmpFn := func(v1, v2 pgraph.Vertex) (bool, error) {
return resources.VtoR(v1).Compare(resources.VtoR(v2)), nil
}
vertexAddFn := func(v pgraph.Vertex) error {
err := resources.VtoR(v).Validate()
return errwrap.Wrapf(err, "could not Validate() resource")
}
vertexRemoveFn := func(v pgraph.Vertex) error {
// wait for exit before starting new graph!
resources.VtoR(v).Exit() // sync
return nil
}
edgeCmpFn := func(e1, e2 pgraph.Edge) (bool, error) {
edge1 := e1.(*resources.Edge) // panic if wrong
edge2 := e2.(*resources.Edge) // panic if wrong
return edge1.Compare(edge2), nil
}
// on success, this updates the receiver graph...
if err := oldGraph.GraphSync(newGraph, vertexCmpFn, vertexAddFn, vertexRemoveFn, edgeCmpFn); err != nil {
log.Printf("Main: Error running graph sync: %v", err)
// unpause!
if !first {
graph.Start(first) // sync
converger.Start() // after Start()
}
if err := obj.ge.Load(newGraph); err != nil { // copy in new graph
Logf("error copying in new graph: %+v", err)
continue
}
//savedGraph := oldGraph.Copy() // save a copy for errors
if err := obj.ge.Validate(); err != nil { // validate the new graph
obj.ge.Abort() // delete graph
Logf("error validating the new graph: %+v", err)
continue
}
// TODO: should we call each Res.Setup() here instead?
// apply the global metaparams to the graph
if err := obj.ge.Apply(func(graph *pgraph.Graph) error {
var err error
for _, v := range graph.Vertices() {
res, ok := v.(engine.Res)
if !ok {
e := fmt.Errorf("vertex `%s` is not a Res", v)
err = multierr.Append(err, e)
continue // we'll catch the error later!
}
m := res.MetaParams()
// apply the global noop parameter if requested
if mainDeploy.Noop {
m.Noop = mainDeploy.Noop
}
// append the semaphore to each resource
if mainDeploy.Sema > 0 { // NOTE: size == 0 would block
// a semaphore with an empty id is valid
m.Sema = append(m.Sema, fmt.Sprintf(":%d", mainDeploy.Sema))
}
}
return err
}); err != nil { // apply an operation to the new graph
obj.ge.Abort() // delete graph
Logf("error applying operation to the new graph: %+v", err)
continue
}
// XXX: can we change this into a ge.Apply operation?
// add autoedges; modifies the graph only if no error
if err := resources.AutoEdges(oldGraph); err != nil {
log.Printf("Main: Error running auto edges: %v", err)
// unpause!
if !first {
graph.Start(first) // sync
converger.Start() // after Start()
}
if err := obj.ge.AutoEdge(); err != nil {
obj.ge.Abort() // delete graph
Logf("error running auto edges: %+v", err)
continue
}
// at this point, any time we error after a destructive
// modification of the graph we need to restore the old
// graph that was previously running, eg:
//
// oldGraph = savedGraph.Copy()
//
// which we are (luckily) able to avoid testing for now
// XXX: can we change this into a ge.Apply operation?
// run autogroup; modifies the graph
if err := obj.ge.AutoGroup(&autogroup.NonReachabilityGrouper{}); err != nil {
obj.ge.Abort() // delete graph
Logf("error running auto grouping: %+v", err)
continue
}
resources.AutoGroup(oldGraph, &resources.NonReachabilityGrouper{}) // run autogroup; modifies the graph
// TODO: do we want to do a transitive reduction?
// FIXME: run a type checker that verifies all the send->recv relationships
graph.Update(oldGraph) // copy in structure of new graph
// Call this here because at this point the graph does
// not know anything about the prometheus instance.
if err := prom.UpdatePgraphStartTime(); err != nil {
log.Printf("Main: Prometheus.UpdatePgraphStartTime() errored: %v", err)
// we need the vertices to be paused to work on them, so
// run graph vertex LOCK...
if started { // TODO: we can flatten this check out I think
converger.Pause() // FIXME: add sync wait?
obj.ge.Pause(fastPause) // sync
started = false
}
// Start() needs to be synchronous or wait,
// because if half of the nodes are started and
// some are not ready yet and the EtcdWatch
// loops, we'll cause Pause() before we
// even got going, thus causing nil pointer errors
graph.Start(first) // sync
converger.Start() // after Start()
log.Printf("Main: Graph: %v", graph) // show graph
Logf("commit...")
if err := obj.ge.Commit(); err != nil {
// If we fail on commit, we have destructively
// destroyed the graph, so we must not run it.
// This graph isn't necessarily destroyed, but
// since an error is not expected here, we can
// either shutdown or wait for the next deploy.
obj.ge.Abort() // delete graph
Logf("error running commit: %+v", err)
// block gapi until a newDeploy comes in...
if gapiImpl != nil { // currently running...
gapiChan = nil
if err := gapiImpl.Close(); err != nil {
err = errwrap.Wrapf(err, "the gapi closed poorly")
Logf("deploy: gapi: close failed: %+v", err)
}
}
continue // stay paused
}
// Start needs to be synchronous because we don't want
// to loop around and cause a pause before we unpaused.
if err := obj.ge.Start(); err != nil { // sync
Logf("error starting graph: %+v", err)
continue
}
converger.Start() // after Start()
started = true
Logf("graph: %+v", obj.ge.Graph()) // show graph
if obj.Graphviz != "" {
filter := obj.GraphvizFilter
if filter == "" {
filter = "dot" // directed graph default
}
if err := graph.ExecGraphviz(filter, obj.Graphviz, hostname); err != nil {
log.Printf("Main: Graphviz: %v", err)
if err := obj.ge.Graph().ExecGraphviz(filter, obj.Graphviz, hostname); err != nil {
Logf("graphviz: %+v", err)
} else {
log.Printf("Main: Graphviz: Successfully generated graph!")
Logf("graphviz: successfully generated graph!")
}
}
first = false
// Call this here because at this point the graph does
// not know anything about the prometheus instance.
if err := prom.UpdatePgraphStartTime(); err != nil {
Logf("prometheus: UpdatePgraphStartTime() errored: %+v", err)
}
}
}()
@@ -667,7 +680,9 @@ func (obj *Main) Run() error {
// don't inline this, because when we close the deployChan it's
// the signal to tell the engine to actually shutdown...
wg.Add(1)
go func() {
defer wg.Done()
defer close(deployChan) // no more are coming ever!
select { // wait until we're ready to shutdown
case <-exitchan:
@@ -676,7 +691,9 @@ func (obj *Main) Run() error {
}()
} else {
// etcd based deploy
wg.Add(1)
go func() {
defer wg.Done()
defer close(deployChan)
startChan := make(chan struct{}) // start signal
close(startChan) // kick it off!
@@ -685,37 +702,56 @@ func (obj *Main) Run() error {
case <-startChan: // kick the loop once at start
startChan = nil // disable
case err, ok := <-etcd.WatchDeploy(EmbdEtcd):
case err, ok := <-etcd.WatchDeploy(embdEtcd):
if !ok {
obj.Exit(nil) // regular shutdown
obj.exit.Done(nil) // regular shutdown
return
}
if err != nil {
// TODO: it broke, can we restart?
obj.Exit(fmt.Errorf("Main: Deploy: Watch error"))
obj.exit.Done(fmt.Errorf("deploy: watch error"))
return
}
startChan = nil // disable it early...
case <-exitchan:
return
}
if obj.Flags.Debug {
log.Printf("Main: Deploy: Got activity")
Logf("deploy: got activity")
}
str, err := etcd.GetDeploy(EmbdEtcd, 0) // 0 means get the latest one
str, err := etcd.GetDeploy(embdEtcd, 0) // 0 means get the latest one
if err != nil {
log.Printf("Main: Deploy: Error getting deploy %+v", err)
Logf("deploy: error getting deploy: %+v", err)
continue
}
if str == "" { // no available deploys exist yet
// send an empty deploy... this is done
// to start up the engine so it can run
// an empty graph and be ready to swap!
Logf("deploy: empty")
deploy := &gapi.Deploy{
Name: empty.Name,
GAPI: &empty.GAPI{},
}
select {
case deployChan <- deploy:
// send
if obj.Flags.Debug {
Logf("deploy: sending empty deploy")
}
case <-exitchan:
return
}
continue
}
// decode the deploy (incl. GAPI) and send it!
deploy, err := gapi.NewDeployFromB64(str)
if err != nil {
log.Printf("Main: Deploy: Error decoding deploy %+v", err)
Logf("deploy: error decoding deploy: %+v", err)
continue
}
@@ -723,7 +759,7 @@ func (obj *Main) Run() error {
case deployChan <- deploy:
// send
if obj.Flags.Debug {
log.Printf("Main: Deploy: Sending new GAPI")
Logf("deploy: sending new gapi")
}
case <-exitchan:
@@ -733,38 +769,56 @@ func (obj *Main) Run() error {
}()
}
log.Println("Main: Running...")
Logf("running...")
reterr := <-obj.exit // wait for exit signal
reterr := obj.exit.Error() // wait for exit signal (block until arrival)
log.Println("Main: Destroy...")
Logf("destroy...")
// tell inner main loop to exit
close(exitchan)
wg.Wait()
graph.Exit() // tells all the children to exit, and waits for them to do so
// cleanup etcd main loop last so it can process everything first
if err := EmbdEtcd.Destroy(); err != nil { // shutdown and cleanup etcd
err = errwrap.Wrapf(err, "embedded Etcd exited poorly")
reterr = multierr.Append(reterr, err) // list of errors
if reterr != nil {
Logf("error: %+v", reterr)
}
return reterr
}
if obj.Prometheus {
log.Printf("Main: Prometheus: Stopping instance")
if err := prom.Stop(); err != nil {
err = errwrap.Wrapf(err, "the Prometheus instance exited poorly")
reterr = multierr.Append(reterr, err)
// Close contains a number of methods which must be run after the Run method.
// You must run them to properly clean up after the main program execution.
func (obj *Main) Close() error {
var err error
// run cleanup functions in reverse (defer) order
for i := len(obj.cleanup) - 1; i >= 0; i-- {
fn := obj.cleanup[i]
if e := fn(); e != nil {
err = multierr.Append(err, e) // list of errors
}
}
if obj.Flags.Debug {
log.Printf("Main: Graph: %v", graph)
}
if reterr != nil {
log.Printf("Main: Error: %v", reterr)
}
log.Println("Goodbye!")
return reterr
return err
}
// Exit causes a safe shutdown. This is often attached to the ^C signal handler.
func (obj *Main) Exit(err error) {
obj.exit.Done(err) // trigger an exit!
}
// FastExit causes a faster shutdown. This is often activated on the second ^C.
func (obj *Main) FastExit(err error) {
if obj.ge != nil {
obj.ge.SetFastPause()
}
obj.Exit(err)
}
// Interrupt causes the fastest shutdown. The only faster method is a kill -9
// which could cause corruption. This is often activated on the third ^C. This
// might leave some of your resources in a partial or unknown state.
func (obj *Main) Interrupt(err error) {
// XXX: implement and run Interrupt API for supported resources
obj.FastExit(err)
}