From 9f5057eac7bc5d241975045aa8e0328713a1bc1a Mon Sep 17 00:00:00 2001
From: James Shubin <james@shubin.ca>
Date: Wed, 7 Jun 2017 04:45:25 -0400
Subject: [PATCH] resources: Do not panic on autogrouped graph switches

Graph changes from autogrouped -> not autogrouped or vice versa cause a
panic (or I assume a leak) because we compared the auto grouped graph to
the ungrouped one, which would cause an Exit on an unstarted Vertex.
This includes a test that seems to reliably reproduces the issue.
---
 lib/main.go                   |  14 ++-
 test/shell/libmgmt-change1.sh |   5 +-
 test/shell/libmgmt-change2.go | 200 ++++++++++++++++++++++++++++++++++
 test/shell/libmgmt-change2.sh |  10 ++
 test/test-gometalinter.sh     |   2 +-
 test/test-gotest.sh           |   3 +-
 6 files changed, 228 insertions(+), 6 deletions(-)
 create mode 100644 test/shell/libmgmt-change2.go
 create mode 100755 test/shell/libmgmt-change2.sh

diff --git a/lib/main.go b/lib/main.go
index 92246b93..d8d1990b 100644
--- a/lib/main.go
+++ b/lib/main.go
@@ -524,6 +524,8 @@ func (obj *Main) Run() error {
 				continue
 			}
 
+			//savedGraph := oldGraph.Copy() // save a copy for errors
+
 			// TODO: should we call each Res.Setup() here instead?
 
 			// add autoedges; modifies the graph only if no error
@@ -537,12 +539,20 @@ func (obj *Main) Run() error {
 				continue
 			}
 
-			graph.Update(oldGraph) // copy in structure of new graph
+			// at this point, any time we error after a destructive
+			// modification of the graph we need to restore the old
+			// graph that was previously running, eg:
+			//
+			//	oldGraph = savedGraph.Copy()
+			//
+			// which we are (luckily) able to avoid testing for now
 
-			resources.AutoGroup(graph.Graph, &resources.NonReachabilityGrouper{}) // run autogroup; modifies the graph
+			resources.AutoGroup(oldGraph, &resources.NonReachabilityGrouper{}) // run autogroup; modifies the graph
 			// TODO: do we want to do a transitive reduction?
 			// FIXME: run a type checker that verifies all the send->recv relationships
 
+			graph.Update(oldGraph) // copy in structure of new graph
+
 			// Call this here because at this point the graph does
 			// not know anything about the prometheus instance.
 			if err := prom.UpdatePgraphStartTime(); err != nil {
diff --git a/test/shell/libmgmt-change1.sh b/test/shell/libmgmt-change1.sh
index 9ae003e1..3318617b 100755
--- a/test/shell/libmgmt-change1.sh
+++ b/test/shell/libmgmt-change1.sh
@@ -1,9 +1,10 @@
 #!/bin/bash -e
 
-go build libmgmt-change1.go
+go build -i -o libmgmt libmgmt-change1.go
 # this example should change graphs frequently, and then shutdown...
-$timeout --kill-after=30s 20s ./libmgmt-change1 &
+$timeout --kill-after=30s 20s ./libmgmt &
 pid=$!
 wait $pid	# get exit status
 e=$?
+rm libmgmt	# cleanup build artefact
 exit $e
diff --git a/test/shell/libmgmt-change2.go b/test/shell/libmgmt-change2.go
new file mode 100644
index 00000000..00964552
--- /dev/null
+++ b/test/shell/libmgmt-change2.go
@@ -0,0 +1,200 @@
+package main
+
+import (
+	"fmt"
+	"log"
+	"os"
+	"os/signal"
+	"sync"
+	"syscall"
+	"time"
+
+	"github.com/purpleidea/mgmt/gapi"
+	mgmt "github.com/purpleidea/mgmt/lib"
+	"github.com/purpleidea/mgmt/pgraph"
+	"github.com/purpleidea/mgmt/resources"
+)
+
+// MyGAPI implements the main GAPI interface.
+type MyGAPI struct {
+	Name     string // graph name
+	Interval uint   // refresh interval, 0 to never refresh
+
+	flipflop  bool // flip flop
+	autoGroup bool
+
+	data        gapi.Data
+	initialized bool
+	closeChan   chan struct{}
+	wg          sync.WaitGroup // sync group for tunnel go routines
+}
+
+// NewMyGAPI creates a new MyGAPI struct and calls Init().
+func NewMyGAPI(data gapi.Data, name string, interval uint) (*MyGAPI, error) {
+	obj := &MyGAPI{
+		Name:     name,
+		Interval: interval,
+	}
+	return obj, obj.Init(data)
+}
+
+// Init initializes the MyGAPI struct.
+func (obj *MyGAPI) Init(data gapi.Data) error {
+	if obj.initialized {
+		return fmt.Errorf("already initialized")
+	}
+	if obj.Name == "" {
+		return fmt.Errorf("the graph name must be specified")
+	}
+
+	//obj.autoGroup = false // XXX: no panic
+	obj.autoGroup = true // XXX: causes panic!
+
+	obj.data = data // store for later
+	obj.closeChan = make(chan struct{})
+	obj.initialized = true
+	return nil
+}
+
+// Graph returns a current Graph.
+func (obj *MyGAPI) Graph() (*pgraph.Graph, error) {
+	if !obj.initialized {
+		return nil, fmt.Errorf("%s: MyGAPI is not initialized", obj.Name)
+	}
+
+	var err error
+	g, err := pgraph.NewGraph(obj.Name)
+	if err != nil {
+		return nil, err
+	}
+
+	if !obj.flipflop {
+		n0, err := resources.NewResource("noop")
+		if err != nil {
+			return nil, err
+		}
+		n0.SetName("noop0")
+		g.AddVertex(n0)
+
+	} else {
+		// NOTE: these will get autogrouped
+		n1, err := resources.NewResource("noop")
+		if err != nil {
+			return nil, err
+		}
+		n1.SetName("noop1")
+		n1.Meta().AutoGroup = obj.autoGroup // enable or disable it
+		g.AddVertex(n1)
+
+		n2, err := resources.NewResource("noop")
+		if err != nil {
+			return nil, err
+		}
+		n2.SetName("noop2")
+		n2.Meta().AutoGroup = obj.autoGroup // enable or disable it
+		g.AddVertex(n2)
+	}
+	obj.flipflop = !obj.flipflop // flip the bool
+
+	//g, err := config.NewGraphFromConfig(obj.data.Hostname, obj.data.World, obj.data.Noop)
+	return g, nil
+}
+
+// Next returns nil errors every time there could be a new graph.
+func (obj *MyGAPI) Next() chan gapi.Next {
+	ch := make(chan gapi.Next)
+	obj.wg.Add(1)
+	go func() {
+		defer obj.wg.Done()
+		defer close(ch) // this will run before the obj.wg.Done()
+		if !obj.initialized {
+			next := gapi.Next{
+				Err:  fmt.Errorf("%s: MyGAPI is not initialized", obj.Name),
+				Exit: true, // exit, b/c programming error?
+			}
+			ch <- next
+			return
+		}
+
+		log.Printf("%s: Generating a bunch of new graphs...", obj.Name)
+		ch <- gapi.Next{}
+		log.Printf("%s: Second generation...", obj.Name)
+		ch <- gapi.Next{}
+		log.Printf("%s: Third generation...", obj.Name)
+		ch <- gapi.Next{}
+		time.Sleep(1 * time.Second)
+		log.Printf("%s: Done generating graphs!", obj.Name)
+	}()
+	return ch
+}
+
+// Close shuts down the MyGAPI.
+func (obj *MyGAPI) Close() error {
+	if !obj.initialized {
+		return fmt.Errorf("%s: MyGAPI is not initialized", obj.Name)
+	}
+	close(obj.closeChan)
+	obj.wg.Wait()
+	obj.initialized = false // closed = true
+	return nil
+}
+
+// Run runs an embedded mgmt server.
+func Run() error {
+
+	obj := &mgmt.Main{}
+	obj.Program = "libmgmt" // TODO: set on compilation
+	obj.Version = "0.0.1"   // TODO: set on compilation
+	obj.TmpPrefix = true
+	obj.IdealClusterSize = -1
+	obj.ConvergedTimeout = 5
+	obj.Noop = false // does stuff!
+
+	obj.GAPI = &MyGAPI{ // graph API
+		Name:     obj.Program, // graph name
+		Interval: 15,          // arbitrarily change graph every 15 seconds
+	}
+
+	if err := obj.Init(); err != nil {
+		return err
+	}
+
+	// install the exit signal handler
+	exit := make(chan struct{})
+	defer close(exit)
+	go func() {
+		signals := make(chan os.Signal, 1)
+		signal.Notify(signals, os.Interrupt) // catch ^C
+		//signal.Notify(signals, os.Kill) // catch signals
+		signal.Notify(signals, syscall.SIGTERM)
+
+		select {
+		case sig := <-signals: // any signal will do
+			if sig == os.Interrupt {
+				log.Println("Interrupted by ^C")
+				obj.Exit(nil)
+				return
+			}
+			log.Println("Interrupted by signal")
+			obj.Exit(fmt.Errorf("killed by %v", sig))
+			return
+		case <-exit:
+			return
+		}
+	}()
+
+	if err := obj.Run(); err != nil {
+		return err
+	}
+	return nil
+}
+
+func main() {
+	log.Printf("Hello!")
+	if err := Run(); err != nil {
+		fmt.Println(err)
+		os.Exit(1)
+		return
+	}
+	log.Printf("Goodbye!")
+}
diff --git a/test/shell/libmgmt-change2.sh b/test/shell/libmgmt-change2.sh
new file mode 100755
index 00000000..b91da796
--- /dev/null
+++ b/test/shell/libmgmt-change2.sh
@@ -0,0 +1,10 @@
+#!/bin/bash -e
+
+go build -i -o libmgmt libmgmt-change2.go
+# this example should change graphs frequently, and then shutdown...
+$timeout --kill-after=30s 20s ./libmgmt &
+pid=$!
+wait $pid	# get exit status
+e=$?
+rm libmgmt	# cleanup build artefact
+exit $e
diff --git a/test/test-gometalinter.sh b/test/test-gometalinter.sh
index dcda5832..8df59850 100755
--- a/test/test-gometalinter.sh
+++ b/test/test-gometalinter.sh
@@ -43,7 +43,7 @@ gometalinter="$gml"
 
 # loop through directories in an attempt to scan each go package
 # TODO: lint the *.go examples as individual files and not as a single *.go
-for dir in `find . -maxdepth 5 -type d -not -path './old/*' -not -path './old' -not -path './tmp/*' -not -path './tmp' -not -path './.*' -not -path './vendor/*' -not -path './examples/*'`; do
+for dir in `find . -maxdepth 5 -type d -not -path './old/*' -not -path './old' -not -path './tmp/*' -not -path './tmp' -not -path './.*' -not -path './vendor/*' -not -path './examples/*' -not -path './test/*'`; do
 	match="$dir/*.go"
 	#echo "match is: $match"
 	if ! ls $match &>/dev/null; then
diff --git a/test/test-gotest.sh b/test/test-gotest.sh
index 3f97dbe4..d395efd5 100755
--- a/test/test-gotest.sh
+++ b/test/test-gotest.sh
@@ -13,7 +13,8 @@ function run-test()
 ROOT=$(dirname "${BASH_SOURCE}")/..
 cd "${ROOT}"
 
-for pkg in `go list ./... | grep -v 'vendor/' | grep -v 'examples/' | grep -v 'old/' | grep -v 'tmp/'`; do
+base=$(go list .)
+for pkg in `go list ./... | grep -v "^${base}/vendor/" | grep -v "^${base}/examples/" | grep -v "^${base}/test/" | grep -v "^${base}/old/" | grep -v "^${base}/tmp/"`; do
 	echo "Testing: $pkg"
 	# FIXME: can we capture and output the stderr from these tests too?
 	run-test go test "$pkg"