pgraph: Add fast pausing and exiting of graphs

This causes a graph to actually stop processing part way through, even
if there are poke's that want to continue on. This is so that the user
experience of pressing ^C actually causes a shutdown without finishing
the graph execution. It might be preferred to have this be a user
defined setting at some point in the future, such as if the user presses
^C twice. As well, we might want to implement an interrupt API so that
individual resource execution can be asked to bail out early if
requested. This could happen on a third ^C press.
This commit is contained in:
James Shubin
2017-03-13 07:44:01 -04:00
parent 074da4da19
commit cd5e2e1148
7 changed files with 188 additions and 3 deletions

View File

@@ -421,7 +421,7 @@ func (obj *Main) Run() error {
// run graph vertex LOCK... // run graph vertex LOCK...
if !first { // TODO: we can flatten this check out I think if !first { // TODO: we can flatten this check out I think
converger.Pause() // FIXME: add sync wait? converger.Pause() // FIXME: add sync wait?
G.Pause() // sync G.Pause(false) // sync
//G.UnGroup() // FIXME: implement me if needed! //G.UnGroup() // FIXME: implement me if needed!
} }

View File

@@ -65,6 +65,14 @@ func (g *Graph) OKTimestamp(v *Vertex) bool {
// Poke tells nodes after me in the dependency graph that they need to refresh. // Poke tells nodes after me in the dependency graph that they need to refresh.
func (g *Graph) Poke(v *Vertex) error { func (g *Graph) Poke(v *Vertex) error {
// if we're pausing (or exiting) then we should suspend poke's so that
// the graph doesn't go on running forever until it's completely done!
// this is an optional feature which we can do by default on user exit
if g.fastPause {
return nil // TODO: should this be an error instead?
}
var wg sync.WaitGroup var wg sync.WaitGroup
// these are all the vertices pointing AWAY FROM v, eg: v -> ??? // these are all the vertices pointing AWAY FROM v, eg: v -> ???
for _, n := range g.OutgoingGraphVertices(v) { for _, n := range g.OutgoingGraphVertices(v) {
@@ -725,14 +733,20 @@ func (g *Graph) Start(first bool) { // start or continue
// we wait for everyone to start before exiting! // we wait for everyone to start before exiting!
} }
// Pause sends pause events to the graph in a topological sort order. // Pause sends pause events to the graph in a topological sort order. If you set
func (g *Graph) Pause() { // the fastPause argument to true, then it will ask future propagation waves to
// not run through the graph before exiting, and instead will exit much quicker.
func (g *Graph) Pause(fastPause bool) {
log.Printf("State: %v -> %v", g.setState(graphStatePausing), g.getState()) log.Printf("State: %v -> %v", g.setState(graphStatePausing), g.getState())
defer log.Printf("State: %v -> %v", g.setState(graphStatePaused), g.getState()) defer log.Printf("State: %v -> %v", g.setState(graphStatePaused), g.getState())
if fastPause {
g.fastPause = true // set flag
}
t, _ := g.TopologicalSort() t, _ := g.TopologicalSort()
for _, v := range t { // squeeze out the events... for _, v := range t { // squeeze out the events...
v.SendEvent(event.EventPause, nil) // sync v.SendEvent(event.EventPause, nil) // sync
} }
g.fastPause = false // reset flag
} }
// Exit sends exit events to the graph in a topological sort order. // Exit sends exit events to the graph in a topological sort order.
@@ -740,6 +754,10 @@ func (g *Graph) Exit() {
if g == nil { // empty graph that wasn't populated yet if g == nil { // empty graph that wasn't populated yet
return return
} }
// FIXME: a second ^C could put this into fast pause, but do it for now!
g.Pause(true) // implement this with pause to avoid duplicating the code
t, _ := g.TopologicalSort() t, _ := g.TopologicalSort()
for _, v := range t { // squeeze out the events... for _, v := range t { // squeeze out the events...
// turn off the taps... // turn off the taps...

View File

@@ -58,6 +58,7 @@ type Graph struct {
Adjacency map[*Vertex]map[*Vertex]*Edge // *Vertex -> *Vertex (edge) Adjacency map[*Vertex]map[*Vertex]*Edge // *Vertex -> *Vertex (edge)
Flags Flags Flags Flags
state graphState state graphState
fastPause bool // used to disable pokes for a fast pause
mutex *sync.Mutex // used when modifying graph State variable mutex *sync.Mutex // used when modifying graph State variable
wg *sync.WaitGroup wg *sync.WaitGroup
semas map[string]*semaphore.Semaphore semas map[string]*semaphore.Semaphore
@@ -129,6 +130,7 @@ func (g *Graph) Copy() *Graph {
wg: g.wg, wg: g.wg,
semas: g.semas, semas: g.semas,
slock: g.slock, slock: g.slock,
fastPause: g.fastPause,
prometheus: g.prometheus, prometheus: g.prometheus,
} }

10
test/shell/graph-exit1.sh Executable file
View File

@@ -0,0 +1,10 @@
#!/bin/bash -e
# should take 15 seconds for longest resources plus startup time to shutdown
# we don't want the ^C to allow the rest of the graph to continue executing!
$timeout --kill-after=35s 25s ./mgmt run --yaml graph-exit.yaml --no-watch --no-pgp --tmp-prefix &
pid=$!
sleep 5s # let the initial resources start to run...
killall -SIGINT mgmt # send ^C to exit mgmt
wait $pid # get exit status
exit $?

View File

@@ -0,0 +1,71 @@
---
graph: parallel
resources:
exec:
- name: exec1
cmd: sleep 15s
shell: ''
timeout: 0
watchcmd: ''
watchshell: ''
ifcmd: ''
ifshell: ''
state: present
- name: exec2
cmd: sleep 15s
shell: ''
timeout: 0
watchcmd: ''
watchshell: ''
ifcmd: ''
ifshell: ''
state: present
- name: exec3
cmd: sleep 15s
shell: ''
timeout: 0
watchcmd: ''
watchshell: ''
ifcmd: ''
ifshell: ''
state: present
- name: exec4
cmd: sleep 15s
shell: ''
timeout: 0
watchcmd: ''
watchshell: ''
ifcmd: ''
ifshell: ''
state: present
- name: exec0
cmd: sleep 15s
shell: ''
timeout: 0
watchcmd: ''
watchshell: ''
ifcmd: ''
ifshell: ''
state: present
edges:
- name: e1
from:
kind: exec
name: exec1
to:
kind: exec
name: exec2
- name: e2
from:
kind: exec
name: exec2
to:
kind: exec
name: exec3
- name: e3
from:
kind: exec
name: exec3
to:
kind: exec
name: exec4

10
test/shell/graph-exit2.sh Executable file
View File

@@ -0,0 +1,10 @@
#!/bin/bash -e
# should take 15 seconds for longest resources plus startup time to shutdown
# we don't want the ^C to allow the rest of the graph to continue executing!
$timeout --kill-after=45s 35s ./mgmt run --yaml graph-exit.yaml --no-watch --no-pgp --tmp-prefix &
pid=$!
sleep 10s # let the initial resources start to run...
killall -SIGINT mgmt # send ^C to exit mgmt
wait $pid # get exit status
exit $?

View File

@@ -0,0 +1,74 @@
---
graph: parallel
resources:
exec:
- name: exec1
meta:
retry: 10
delay: 1000
cmd: 'sleep 5s && exit 13'
shell: '/bin/bash'
timeout: 0
watchcmd: ''
watchshell: ''
ifcmd: ''
ifshell: ''
state: present
- name: exec2
cmd: sleep 15s
shell: ''
timeout: 0
watchcmd: ''
watchshell: ''
ifcmd: ''
ifshell: ''
state: present
- name: exec3
cmd: sleep 15s
shell: ''
timeout: 0
watchcmd: ''
watchshell: ''
ifcmd: ''
ifshell: ''
state: present
- name: exec4
cmd: sleep 15s
shell: ''
timeout: 0
watchcmd: ''
watchshell: ''
ifcmd: ''
ifshell: ''
state: present
- name: exec0
cmd: sleep 15s
shell: ''
timeout: 0
watchcmd: ''
watchshell: ''
ifcmd: ''
ifshell: ''
state: present
edges:
- name: e1
from:
kind: exec
name: exec1
to:
kind: exec
name: exec2
- name: e2
from:
kind: exec
name: exec2
to:
kind: exec
name: exec3
- name: e3
from:
kind: exec
name: exec3
to:
kind: exec
name: exec4