engine: Work around bad timestamp panic

Occasionally when a back poke happens downstream of an upstream vertex which has already exited, it could get back poked, which would cause a panic. This moves the deletion of the state struct until the entire graph has completed so that it won't panic. It doesn't matter if a back poke is lost, we're shutting down or pausing, and in this scenario it can be lost.
2019-01-12 17:24:29 -05:00
parent e2296a631b
commit b808592fb3
1 changed files with 14 additions and 2 deletions
--- a/engine/graph/engine.go
+++ b/engine/graph/engine.go
@@ -194,6 +194,7 @@ func (obj *Engine) Commit() error {
 		}
 		return nil
 	}
+	free := []func() error{} // functions to run after graphsync to reset...
 	vertexRemoveFn := func(vertex pgraph.Vertex) error {
 		// wait for exit before starting new graph!
 		obj.state[vertex].Event(event.Exit) // signal an exit
@@ -206,8 +207,12 @@ func (obj *Engine) Commit() error {
 		}

 		// delete to free up memory from old graphs
-		delete(obj.state, vertex)
-		delete(obj.waits, vertex)
+		fn := func() error {
+			delete(obj.state, vertex)
+			delete(obj.waits, vertex)
+			return nil
+		}
+		free = append(free, fn) // do this at the end, so we don't panic
 		return nil
 	}

@@ -218,6 +223,13 @@ func (obj *Engine) Commit() error {
 	if err := obj.graph.GraphSync(obj.nextGraph, engine.VertexCmpFn, vertexAddFn, vertexRemoveFn, engine.EdgeCmpFn); err != nil {
 		return errwrap.Wrapf(err, "error running graph sync")
 	}
+	// we run these afterwards, so that the state structs (that might get
+	// referenced) aren't destroyed while someone might poke or use one.
+	for _, fn := range free {
+		if err := fn(); err != nil {
+			return errwrap.Wrapf(err, "error running free fn")
+		}
+	}
 	obj.nextGraph = nil

 	// After this point, we must not error or we'd need to restore all of