From 9f5057eac7bc5d241975045aa8e0328713a1bc1a Mon Sep 17 00:00:00 2001 From: James Shubin Date: Wed, 7 Jun 2017 04:45:25 -0400 Subject: [PATCH] resources: Do not panic on autogrouped graph switches Graph changes from autogrouped -> not autogrouped or vice versa cause a panic (or I assume a leak) because we compared the auto grouped graph to the ungrouped one, which would cause an Exit on an unstarted Vertex. This includes a test that seems to reliably reproduces the issue. --- lib/main.go | 14 ++- test/shell/libmgmt-change1.sh | 5 +- test/shell/libmgmt-change2.go | 200 ++++++++++++++++++++++++++++++++++ test/shell/libmgmt-change2.sh | 10 ++ test/test-gometalinter.sh | 2 +- test/test-gotest.sh | 3 +- 6 files changed, 228 insertions(+), 6 deletions(-) create mode 100644 test/shell/libmgmt-change2.go create mode 100755 test/shell/libmgmt-change2.sh diff --git a/lib/main.go b/lib/main.go index 92246b93..d8d1990b 100644 --- a/lib/main.go +++ b/lib/main.go @@ -524,6 +524,8 @@ func (obj *Main) Run() error { continue } + //savedGraph := oldGraph.Copy() // save a copy for errors + // TODO: should we call each Res.Setup() here instead? // add autoedges; modifies the graph only if no error @@ -537,12 +539,20 @@ func (obj *Main) Run() error { continue } - graph.Update(oldGraph) // copy in structure of new graph + // at this point, any time we error after a destructive + // modification of the graph we need to restore the old + // graph that was previously running, eg: + // + // oldGraph = savedGraph.Copy() + // + // which we are (luckily) able to avoid testing for now - resources.AutoGroup(graph.Graph, &resources.NonReachabilityGrouper{}) // run autogroup; modifies the graph + resources.AutoGroup(oldGraph, &resources.NonReachabilityGrouper{}) // run autogroup; modifies the graph // TODO: do we want to do a transitive reduction? // FIXME: run a type checker that verifies all the send->recv relationships + graph.Update(oldGraph) // copy in structure of new graph + // Call this here because at this point the graph does // not know anything about the prometheus instance. if err := prom.UpdatePgraphStartTime(); err != nil { diff --git a/test/shell/libmgmt-change1.sh b/test/shell/libmgmt-change1.sh index 9ae003e1..3318617b 100755 --- a/test/shell/libmgmt-change1.sh +++ b/test/shell/libmgmt-change1.sh @@ -1,9 +1,10 @@ #!/bin/bash -e -go build libmgmt-change1.go +go build -i -o libmgmt libmgmt-change1.go # this example should change graphs frequently, and then shutdown... -$timeout --kill-after=30s 20s ./libmgmt-change1 & +$timeout --kill-after=30s 20s ./libmgmt & pid=$! wait $pid # get exit status e=$? +rm libmgmt # cleanup build artefact exit $e diff --git a/test/shell/libmgmt-change2.go b/test/shell/libmgmt-change2.go new file mode 100644 index 00000000..00964552 --- /dev/null +++ b/test/shell/libmgmt-change2.go @@ -0,0 +1,200 @@ +package main + +import ( + "fmt" + "log" + "os" + "os/signal" + "sync" + "syscall" + "time" + + "github.com/purpleidea/mgmt/gapi" + mgmt "github.com/purpleidea/mgmt/lib" + "github.com/purpleidea/mgmt/pgraph" + "github.com/purpleidea/mgmt/resources" +) + +// MyGAPI implements the main GAPI interface. +type MyGAPI struct { + Name string // graph name + Interval uint // refresh interval, 0 to never refresh + + flipflop bool // flip flop + autoGroup bool + + data gapi.Data + initialized bool + closeChan chan struct{} + wg sync.WaitGroup // sync group for tunnel go routines +} + +// NewMyGAPI creates a new MyGAPI struct and calls Init(). +func NewMyGAPI(data gapi.Data, name string, interval uint) (*MyGAPI, error) { + obj := &MyGAPI{ + Name: name, + Interval: interval, + } + return obj, obj.Init(data) +} + +// Init initializes the MyGAPI struct. +func (obj *MyGAPI) Init(data gapi.Data) error { + if obj.initialized { + return fmt.Errorf("already initialized") + } + if obj.Name == "" { + return fmt.Errorf("the graph name must be specified") + } + + //obj.autoGroup = false // XXX: no panic + obj.autoGroup = true // XXX: causes panic! + + obj.data = data // store for later + obj.closeChan = make(chan struct{}) + obj.initialized = true + return nil +} + +// Graph returns a current Graph. +func (obj *MyGAPI) Graph() (*pgraph.Graph, error) { + if !obj.initialized { + return nil, fmt.Errorf("%s: MyGAPI is not initialized", obj.Name) + } + + var err error + g, err := pgraph.NewGraph(obj.Name) + if err != nil { + return nil, err + } + + if !obj.flipflop { + n0, err := resources.NewResource("noop") + if err != nil { + return nil, err + } + n0.SetName("noop0") + g.AddVertex(n0) + + } else { + // NOTE: these will get autogrouped + n1, err := resources.NewResource("noop") + if err != nil { + return nil, err + } + n1.SetName("noop1") + n1.Meta().AutoGroup = obj.autoGroup // enable or disable it + g.AddVertex(n1) + + n2, err := resources.NewResource("noop") + if err != nil { + return nil, err + } + n2.SetName("noop2") + n2.Meta().AutoGroup = obj.autoGroup // enable or disable it + g.AddVertex(n2) + } + obj.flipflop = !obj.flipflop // flip the bool + + //g, err := config.NewGraphFromConfig(obj.data.Hostname, obj.data.World, obj.data.Noop) + return g, nil +} + +// Next returns nil errors every time there could be a new graph. +func (obj *MyGAPI) Next() chan gapi.Next { + ch := make(chan gapi.Next) + obj.wg.Add(1) + go func() { + defer obj.wg.Done() + defer close(ch) // this will run before the obj.wg.Done() + if !obj.initialized { + next := gapi.Next{ + Err: fmt.Errorf("%s: MyGAPI is not initialized", obj.Name), + Exit: true, // exit, b/c programming error? + } + ch <- next + return + } + + log.Printf("%s: Generating a bunch of new graphs...", obj.Name) + ch <- gapi.Next{} + log.Printf("%s: Second generation...", obj.Name) + ch <- gapi.Next{} + log.Printf("%s: Third generation...", obj.Name) + ch <- gapi.Next{} + time.Sleep(1 * time.Second) + log.Printf("%s: Done generating graphs!", obj.Name) + }() + return ch +} + +// Close shuts down the MyGAPI. +func (obj *MyGAPI) Close() error { + if !obj.initialized { + return fmt.Errorf("%s: MyGAPI is not initialized", obj.Name) + } + close(obj.closeChan) + obj.wg.Wait() + obj.initialized = false // closed = true + return nil +} + +// Run runs an embedded mgmt server. +func Run() error { + + obj := &mgmt.Main{} + obj.Program = "libmgmt" // TODO: set on compilation + obj.Version = "0.0.1" // TODO: set on compilation + obj.TmpPrefix = true + obj.IdealClusterSize = -1 + obj.ConvergedTimeout = 5 + obj.Noop = false // does stuff! + + obj.GAPI = &MyGAPI{ // graph API + Name: obj.Program, // graph name + Interval: 15, // arbitrarily change graph every 15 seconds + } + + if err := obj.Init(); err != nil { + return err + } + + // install the exit signal handler + exit := make(chan struct{}) + defer close(exit) + go func() { + signals := make(chan os.Signal, 1) + signal.Notify(signals, os.Interrupt) // catch ^C + //signal.Notify(signals, os.Kill) // catch signals + signal.Notify(signals, syscall.SIGTERM) + + select { + case sig := <-signals: // any signal will do + if sig == os.Interrupt { + log.Println("Interrupted by ^C") + obj.Exit(nil) + return + } + log.Println("Interrupted by signal") + obj.Exit(fmt.Errorf("killed by %v", sig)) + return + case <-exit: + return + } + }() + + if err := obj.Run(); err != nil { + return err + } + return nil +} + +func main() { + log.Printf("Hello!") + if err := Run(); err != nil { + fmt.Println(err) + os.Exit(1) + return + } + log.Printf("Goodbye!") +} diff --git a/test/shell/libmgmt-change2.sh b/test/shell/libmgmt-change2.sh new file mode 100755 index 00000000..b91da796 --- /dev/null +++ b/test/shell/libmgmt-change2.sh @@ -0,0 +1,10 @@ +#!/bin/bash -e + +go build -i -o libmgmt libmgmt-change2.go +# this example should change graphs frequently, and then shutdown... +$timeout --kill-after=30s 20s ./libmgmt & +pid=$! +wait $pid # get exit status +e=$? +rm libmgmt # cleanup build artefact +exit $e diff --git a/test/test-gometalinter.sh b/test/test-gometalinter.sh index dcda5832..8df59850 100755 --- a/test/test-gometalinter.sh +++ b/test/test-gometalinter.sh @@ -43,7 +43,7 @@ gometalinter="$gml" # loop through directories in an attempt to scan each go package # TODO: lint the *.go examples as individual files and not as a single *.go -for dir in `find . -maxdepth 5 -type d -not -path './old/*' -not -path './old' -not -path './tmp/*' -not -path './tmp' -not -path './.*' -not -path './vendor/*' -not -path './examples/*'`; do +for dir in `find . -maxdepth 5 -type d -not -path './old/*' -not -path './old' -not -path './tmp/*' -not -path './tmp' -not -path './.*' -not -path './vendor/*' -not -path './examples/*' -not -path './test/*'`; do match="$dir/*.go" #echo "match is: $match" if ! ls $match &>/dev/null; then diff --git a/test/test-gotest.sh b/test/test-gotest.sh index 3f97dbe4..d395efd5 100755 --- a/test/test-gotest.sh +++ b/test/test-gotest.sh @@ -13,7 +13,8 @@ function run-test() ROOT=$(dirname "${BASH_SOURCE}")/.. cd "${ROOT}" -for pkg in `go list ./... | grep -v 'vendor/' | grep -v 'examples/' | grep -v 'old/' | grep -v 'tmp/'`; do +base=$(go list .) +for pkg in `go list ./... | grep -v "^${base}/vendor/" | grep -v "^${base}/examples/" | grep -v "^${base}/test/" | grep -v "^${base}/old/" | grep -v "^${base}/tmp/"`; do echo "Testing: $pkg" # FIXME: can we capture and output the stderr from these tests too? run-test go test "$pkg"