resources: Do not panic on autogrouped graph switches
Graph changes from autogrouped -> not autogrouped or vice versa cause a panic (or I assume a leak) because we compared the auto grouped graph to the ungrouped one, which would cause an Exit on an unstarted Vertex. This includes a test that seems to reliably reproduces the issue.
This commit is contained in:
14
lib/main.go
14
lib/main.go
@@ -524,6 +524,8 @@ func (obj *Main) Run() error {
|
||||
continue
|
||||
}
|
||||
|
||||
//savedGraph := oldGraph.Copy() // save a copy for errors
|
||||
|
||||
// TODO: should we call each Res.Setup() here instead?
|
||||
|
||||
// add autoedges; modifies the graph only if no error
|
||||
@@ -537,12 +539,20 @@ func (obj *Main) Run() error {
|
||||
continue
|
||||
}
|
||||
|
||||
graph.Update(oldGraph) // copy in structure of new graph
|
||||
// at this point, any time we error after a destructive
|
||||
// modification of the graph we need to restore the old
|
||||
// graph that was previously running, eg:
|
||||
//
|
||||
// oldGraph = savedGraph.Copy()
|
||||
//
|
||||
// which we are (luckily) able to avoid testing for now
|
||||
|
||||
resources.AutoGroup(graph.Graph, &resources.NonReachabilityGrouper{}) // run autogroup; modifies the graph
|
||||
resources.AutoGroup(oldGraph, &resources.NonReachabilityGrouper{}) // run autogroup; modifies the graph
|
||||
// TODO: do we want to do a transitive reduction?
|
||||
// FIXME: run a type checker that verifies all the send->recv relationships
|
||||
|
||||
graph.Update(oldGraph) // copy in structure of new graph
|
||||
|
||||
// Call this here because at this point the graph does
|
||||
// not know anything about the prometheus instance.
|
||||
if err := prom.UpdatePgraphStartTime(); err != nil {
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
go build libmgmt-change1.go
|
||||
go build -i -o libmgmt libmgmt-change1.go
|
||||
# this example should change graphs frequently, and then shutdown...
|
||||
$timeout --kill-after=30s 20s ./libmgmt-change1 &
|
||||
$timeout --kill-after=30s 20s ./libmgmt &
|
||||
pid=$!
|
||||
wait $pid # get exit status
|
||||
e=$?
|
||||
rm libmgmt # cleanup build artefact
|
||||
exit $e
|
||||
|
||||
200
test/shell/libmgmt-change2.go
Normal file
200
test/shell/libmgmt-change2.go
Normal file
@@ -0,0 +1,200 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/purpleidea/mgmt/gapi"
|
||||
mgmt "github.com/purpleidea/mgmt/lib"
|
||||
"github.com/purpleidea/mgmt/pgraph"
|
||||
"github.com/purpleidea/mgmt/resources"
|
||||
)
|
||||
|
||||
// MyGAPI implements the main GAPI interface.
|
||||
type MyGAPI struct {
|
||||
Name string // graph name
|
||||
Interval uint // refresh interval, 0 to never refresh
|
||||
|
||||
flipflop bool // flip flop
|
||||
autoGroup bool
|
||||
|
||||
data gapi.Data
|
||||
initialized bool
|
||||
closeChan chan struct{}
|
||||
wg sync.WaitGroup // sync group for tunnel go routines
|
||||
}
|
||||
|
||||
// NewMyGAPI creates a new MyGAPI struct and calls Init().
|
||||
func NewMyGAPI(data gapi.Data, name string, interval uint) (*MyGAPI, error) {
|
||||
obj := &MyGAPI{
|
||||
Name: name,
|
||||
Interval: interval,
|
||||
}
|
||||
return obj, obj.Init(data)
|
||||
}
|
||||
|
||||
// Init initializes the MyGAPI struct.
|
||||
func (obj *MyGAPI) Init(data gapi.Data) error {
|
||||
if obj.initialized {
|
||||
return fmt.Errorf("already initialized")
|
||||
}
|
||||
if obj.Name == "" {
|
||||
return fmt.Errorf("the graph name must be specified")
|
||||
}
|
||||
|
||||
//obj.autoGroup = false // XXX: no panic
|
||||
obj.autoGroup = true // XXX: causes panic!
|
||||
|
||||
obj.data = data // store for later
|
||||
obj.closeChan = make(chan struct{})
|
||||
obj.initialized = true
|
||||
return nil
|
||||
}
|
||||
|
||||
// Graph returns a current Graph.
|
||||
func (obj *MyGAPI) Graph() (*pgraph.Graph, error) {
|
||||
if !obj.initialized {
|
||||
return nil, fmt.Errorf("%s: MyGAPI is not initialized", obj.Name)
|
||||
}
|
||||
|
||||
var err error
|
||||
g, err := pgraph.NewGraph(obj.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !obj.flipflop {
|
||||
n0, err := resources.NewResource("noop")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
n0.SetName("noop0")
|
||||
g.AddVertex(n0)
|
||||
|
||||
} else {
|
||||
// NOTE: these will get autogrouped
|
||||
n1, err := resources.NewResource("noop")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
n1.SetName("noop1")
|
||||
n1.Meta().AutoGroup = obj.autoGroup // enable or disable it
|
||||
g.AddVertex(n1)
|
||||
|
||||
n2, err := resources.NewResource("noop")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
n2.SetName("noop2")
|
||||
n2.Meta().AutoGroup = obj.autoGroup // enable or disable it
|
||||
g.AddVertex(n2)
|
||||
}
|
||||
obj.flipflop = !obj.flipflop // flip the bool
|
||||
|
||||
//g, err := config.NewGraphFromConfig(obj.data.Hostname, obj.data.World, obj.data.Noop)
|
||||
return g, nil
|
||||
}
|
||||
|
||||
// Next returns nil errors every time there could be a new graph.
|
||||
func (obj *MyGAPI) Next() chan gapi.Next {
|
||||
ch := make(chan gapi.Next)
|
||||
obj.wg.Add(1)
|
||||
go func() {
|
||||
defer obj.wg.Done()
|
||||
defer close(ch) // this will run before the obj.wg.Done()
|
||||
if !obj.initialized {
|
||||
next := gapi.Next{
|
||||
Err: fmt.Errorf("%s: MyGAPI is not initialized", obj.Name),
|
||||
Exit: true, // exit, b/c programming error?
|
||||
}
|
||||
ch <- next
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("%s: Generating a bunch of new graphs...", obj.Name)
|
||||
ch <- gapi.Next{}
|
||||
log.Printf("%s: Second generation...", obj.Name)
|
||||
ch <- gapi.Next{}
|
||||
log.Printf("%s: Third generation...", obj.Name)
|
||||
ch <- gapi.Next{}
|
||||
time.Sleep(1 * time.Second)
|
||||
log.Printf("%s: Done generating graphs!", obj.Name)
|
||||
}()
|
||||
return ch
|
||||
}
|
||||
|
||||
// Close shuts down the MyGAPI.
|
||||
func (obj *MyGAPI) Close() error {
|
||||
if !obj.initialized {
|
||||
return fmt.Errorf("%s: MyGAPI is not initialized", obj.Name)
|
||||
}
|
||||
close(obj.closeChan)
|
||||
obj.wg.Wait()
|
||||
obj.initialized = false // closed = true
|
||||
return nil
|
||||
}
|
||||
|
||||
// Run runs an embedded mgmt server.
|
||||
func Run() error {
|
||||
|
||||
obj := &mgmt.Main{}
|
||||
obj.Program = "libmgmt" // TODO: set on compilation
|
||||
obj.Version = "0.0.1" // TODO: set on compilation
|
||||
obj.TmpPrefix = true
|
||||
obj.IdealClusterSize = -1
|
||||
obj.ConvergedTimeout = 5
|
||||
obj.Noop = false // does stuff!
|
||||
|
||||
obj.GAPI = &MyGAPI{ // graph API
|
||||
Name: obj.Program, // graph name
|
||||
Interval: 15, // arbitrarily change graph every 15 seconds
|
||||
}
|
||||
|
||||
if err := obj.Init(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// install the exit signal handler
|
||||
exit := make(chan struct{})
|
||||
defer close(exit)
|
||||
go func() {
|
||||
signals := make(chan os.Signal, 1)
|
||||
signal.Notify(signals, os.Interrupt) // catch ^C
|
||||
//signal.Notify(signals, os.Kill) // catch signals
|
||||
signal.Notify(signals, syscall.SIGTERM)
|
||||
|
||||
select {
|
||||
case sig := <-signals: // any signal will do
|
||||
if sig == os.Interrupt {
|
||||
log.Println("Interrupted by ^C")
|
||||
obj.Exit(nil)
|
||||
return
|
||||
}
|
||||
log.Println("Interrupted by signal")
|
||||
obj.Exit(fmt.Errorf("killed by %v", sig))
|
||||
return
|
||||
case <-exit:
|
||||
return
|
||||
}
|
||||
}()
|
||||
|
||||
if err := obj.Run(); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
log.Printf("Hello!")
|
||||
if err := Run(); err != nil {
|
||||
fmt.Println(err)
|
||||
os.Exit(1)
|
||||
return
|
||||
}
|
||||
log.Printf("Goodbye!")
|
||||
}
|
||||
10
test/shell/libmgmt-change2.sh
Executable file
10
test/shell/libmgmt-change2.sh
Executable file
@@ -0,0 +1,10 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
go build -i -o libmgmt libmgmt-change2.go
|
||||
# this example should change graphs frequently, and then shutdown...
|
||||
$timeout --kill-after=30s 20s ./libmgmt &
|
||||
pid=$!
|
||||
wait $pid # get exit status
|
||||
e=$?
|
||||
rm libmgmt # cleanup build artefact
|
||||
exit $e
|
||||
@@ -43,7 +43,7 @@ gometalinter="$gml"
|
||||
|
||||
# loop through directories in an attempt to scan each go package
|
||||
# TODO: lint the *.go examples as individual files and not as a single *.go
|
||||
for dir in `find . -maxdepth 5 -type d -not -path './old/*' -not -path './old' -not -path './tmp/*' -not -path './tmp' -not -path './.*' -not -path './vendor/*' -not -path './examples/*'`; do
|
||||
for dir in `find . -maxdepth 5 -type d -not -path './old/*' -not -path './old' -not -path './tmp/*' -not -path './tmp' -not -path './.*' -not -path './vendor/*' -not -path './examples/*' -not -path './test/*'`; do
|
||||
match="$dir/*.go"
|
||||
#echo "match is: $match"
|
||||
if ! ls $match &>/dev/null; then
|
||||
|
||||
@@ -13,7 +13,8 @@ function run-test()
|
||||
ROOT=$(dirname "${BASH_SOURCE}")/..
|
||||
cd "${ROOT}"
|
||||
|
||||
for pkg in `go list ./... | grep -v 'vendor/' | grep -v 'examples/' | grep -v 'old/' | grep -v 'tmp/'`; do
|
||||
base=$(go list .)
|
||||
for pkg in `go list ./... | grep -v "^${base}/vendor/" | grep -v "^${base}/examples/" | grep -v "^${base}/test/" | grep -v "^${base}/old/" | grep -v "^${base}/tmp/"`; do
|
||||
echo "Testing: $pkg"
|
||||
# FIXME: can we capture and output the stderr from these tests too?
|
||||
run-test go test "$pkg"
|
||||
|
||||
Reference in New Issue
Block a user