From cd5e2e1148cea7331a91725a263e3199d563c6f5 Mon Sep 17 00:00:00 2001 From: James Shubin Date: Mon, 13 Mar 2017 07:44:01 -0400 Subject: [PATCH] pgraph: Add fast pausing and exiting of graphs This causes a graph to actually stop processing part way through, even if there are poke's that want to continue on. This is so that the user experience of pressing ^C actually causes a shutdown without finishing the graph execution. It might be preferred to have this be a user defined setting at some point in the future, such as if the user presses ^C twice. As well, we might want to implement an interrupt API so that individual resource execution can be asked to bail out early if requested. This could happen on a third ^C press. --- lib/main.go | 2 +- pgraph/actions.go | 22 ++++++++++- pgraph/pgraph.go | 2 + test/shell/graph-exit1.sh | 10 +++++ test/shell/graph-exit1.yaml | 71 +++++++++++++++++++++++++++++++++++ test/shell/graph-exit2.sh | 10 +++++ test/shell/graph-exit2.yaml | 74 +++++++++++++++++++++++++++++++++++++ 7 files changed, 188 insertions(+), 3 deletions(-) create mode 100755 test/shell/graph-exit1.sh create mode 100644 test/shell/graph-exit1.yaml create mode 100755 test/shell/graph-exit2.sh create mode 100644 test/shell/graph-exit2.yaml diff --git a/lib/main.go b/lib/main.go index 7a0f762a..c800657a 100644 --- a/lib/main.go +++ b/lib/main.go @@ -421,7 +421,7 @@ func (obj *Main) Run() error { // run graph vertex LOCK... if !first { // TODO: we can flatten this check out I think converger.Pause() // FIXME: add sync wait? - G.Pause() // sync + G.Pause(false) // sync //G.UnGroup() // FIXME: implement me if needed! } diff --git a/pgraph/actions.go b/pgraph/actions.go index 47bde1d5..7a1a2d13 100644 --- a/pgraph/actions.go +++ b/pgraph/actions.go @@ -65,6 +65,14 @@ func (g *Graph) OKTimestamp(v *Vertex) bool { // Poke tells nodes after me in the dependency graph that they need to refresh. func (g *Graph) Poke(v *Vertex) error { + + // if we're pausing (or exiting) then we should suspend poke's so that + // the graph doesn't go on running forever until it's completely done! + // this is an optional feature which we can do by default on user exit + if g.fastPause { + return nil // TODO: should this be an error instead? + } + var wg sync.WaitGroup // these are all the vertices pointing AWAY FROM v, eg: v -> ??? for _, n := range g.OutgoingGraphVertices(v) { @@ -725,14 +733,20 @@ func (g *Graph) Start(first bool) { // start or continue // we wait for everyone to start before exiting! } -// Pause sends pause events to the graph in a topological sort order. -func (g *Graph) Pause() { +// Pause sends pause events to the graph in a topological sort order. If you set +// the fastPause argument to true, then it will ask future propagation waves to +// not run through the graph before exiting, and instead will exit much quicker. +func (g *Graph) Pause(fastPause bool) { log.Printf("State: %v -> %v", g.setState(graphStatePausing), g.getState()) defer log.Printf("State: %v -> %v", g.setState(graphStatePaused), g.getState()) + if fastPause { + g.fastPause = true // set flag + } t, _ := g.TopologicalSort() for _, v := range t { // squeeze out the events... v.SendEvent(event.EventPause, nil) // sync } + g.fastPause = false // reset flag } // Exit sends exit events to the graph in a topological sort order. @@ -740,6 +754,10 @@ func (g *Graph) Exit() { if g == nil { // empty graph that wasn't populated yet return } + + // FIXME: a second ^C could put this into fast pause, but do it for now! + g.Pause(true) // implement this with pause to avoid duplicating the code + t, _ := g.TopologicalSort() for _, v := range t { // squeeze out the events... // turn off the taps... diff --git a/pgraph/pgraph.go b/pgraph/pgraph.go index 295c3f2a..945602ea 100644 --- a/pgraph/pgraph.go +++ b/pgraph/pgraph.go @@ -58,6 +58,7 @@ type Graph struct { Adjacency map[*Vertex]map[*Vertex]*Edge // *Vertex -> *Vertex (edge) Flags Flags state graphState + fastPause bool // used to disable pokes for a fast pause mutex *sync.Mutex // used when modifying graph State variable wg *sync.WaitGroup semas map[string]*semaphore.Semaphore @@ -129,6 +130,7 @@ func (g *Graph) Copy() *Graph { wg: g.wg, semas: g.semas, slock: g.slock, + fastPause: g.fastPause, prometheus: g.prometheus, } diff --git a/test/shell/graph-exit1.sh b/test/shell/graph-exit1.sh new file mode 100755 index 00000000..cb2d43cf --- /dev/null +++ b/test/shell/graph-exit1.sh @@ -0,0 +1,10 @@ +#!/bin/bash -e + +# should take 15 seconds for longest resources plus startup time to shutdown +# we don't want the ^C to allow the rest of the graph to continue executing! +$timeout --kill-after=35s 25s ./mgmt run --yaml graph-exit.yaml --no-watch --no-pgp --tmp-prefix & +pid=$! +sleep 5s # let the initial resources start to run... +killall -SIGINT mgmt # send ^C to exit mgmt +wait $pid # get exit status +exit $? diff --git a/test/shell/graph-exit1.yaml b/test/shell/graph-exit1.yaml new file mode 100644 index 00000000..8583a1cc --- /dev/null +++ b/test/shell/graph-exit1.yaml @@ -0,0 +1,71 @@ +--- +graph: parallel +resources: + exec: + - name: exec1 + cmd: sleep 15s + shell: '' + timeout: 0 + watchcmd: '' + watchshell: '' + ifcmd: '' + ifshell: '' + state: present + - name: exec2 + cmd: sleep 15s + shell: '' + timeout: 0 + watchcmd: '' + watchshell: '' + ifcmd: '' + ifshell: '' + state: present + - name: exec3 + cmd: sleep 15s + shell: '' + timeout: 0 + watchcmd: '' + watchshell: '' + ifcmd: '' + ifshell: '' + state: present + - name: exec4 + cmd: sleep 15s + shell: '' + timeout: 0 + watchcmd: '' + watchshell: '' + ifcmd: '' + ifshell: '' + state: present + - name: exec0 + cmd: sleep 15s + shell: '' + timeout: 0 + watchcmd: '' + watchshell: '' + ifcmd: '' + ifshell: '' + state: present +edges: +- name: e1 + from: + kind: exec + name: exec1 + to: + kind: exec + name: exec2 +- name: e2 + from: + kind: exec + name: exec2 + to: + kind: exec + name: exec3 +- name: e3 + from: + kind: exec + name: exec3 + to: + kind: exec + name: exec4 diff --git a/test/shell/graph-exit2.sh b/test/shell/graph-exit2.sh new file mode 100755 index 00000000..a04f7998 --- /dev/null +++ b/test/shell/graph-exit2.sh @@ -0,0 +1,10 @@ +#!/bin/bash -e + +# should take 15 seconds for longest resources plus startup time to shutdown +# we don't want the ^C to allow the rest of the graph to continue executing! +$timeout --kill-after=45s 35s ./mgmt run --yaml graph-exit.yaml --no-watch --no-pgp --tmp-prefix & +pid=$! +sleep 10s # let the initial resources start to run... +killall -SIGINT mgmt # send ^C to exit mgmt +wait $pid # get exit status +exit $? diff --git a/test/shell/graph-exit2.yaml b/test/shell/graph-exit2.yaml new file mode 100644 index 00000000..196ea858 --- /dev/null +++ b/test/shell/graph-exit2.yaml @@ -0,0 +1,74 @@ +--- +graph: parallel +resources: + exec: + - name: exec1 + meta: + retry: 10 + delay: 1000 + cmd: 'sleep 5s && exit 13' + shell: '/bin/bash' + timeout: 0 + watchcmd: '' + watchshell: '' + ifcmd: '' + ifshell: '' + state: present + - name: exec2 + cmd: sleep 15s + shell: '' + timeout: 0 + watchcmd: '' + watchshell: '' + ifcmd: '' + ifshell: '' + state: present + - name: exec3 + cmd: sleep 15s + shell: '' + timeout: 0 + watchcmd: '' + watchshell: '' + ifcmd: '' + ifshell: '' + state: present + - name: exec4 + cmd: sleep 15s + shell: '' + timeout: 0 + watchcmd: '' + watchshell: '' + ifcmd: '' + ifshell: '' + state: present + - name: exec0 + cmd: sleep 15s + shell: '' + timeout: 0 + watchcmd: '' + watchshell: '' + ifcmd: '' + ifshell: '' + state: present +edges: +- name: e1 + from: + kind: exec + name: exec1 + to: + kind: exec + name: exec2 +- name: e2 + from: + kind: exec + name: exec2 + to: + kind: exec + name: exec3 +- name: e3 + from: + kind: exec + name: exec3 + to: + kind: exec + name: exec4