From 652b6578091c0823eeedf11b88ca96861ef12522 Mon Sep 17 00:00:00 2001 From: James Shubin Date: Sun, 24 Feb 2019 11:37:25 -0500 Subject: [PATCH] resources: exec: Avoid possible deadlock race Some of the early code I wrote probably wouldn't pass my own reviews today. Here's one example of a rare deadlock that could sometimes occur when a Process/CheckApply caused a shutdown, but the bufio tried to send on a channel that nobody was going to read any more. Now we properly unblock that send with a context. --- engine/resources/exec.go | 19 +++++++++++++++---- test/shell/exec-fail.sh | 2 +- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/engine/resources/exec.go b/engine/resources/exec.go index 4c697a54..58077ff1 100644 --- a/engine/resources/exec.go +++ b/engine/resources/exec.go @@ -20,6 +20,7 @@ package resources import ( "bufio" "bytes" + "context" "fmt" "os/exec" "os/user" @@ -154,7 +155,9 @@ func (obj *ExecRes) Watch() error { return errwrap.Wrapf(err, "error starting Cmd") } - ioChan = obj.bufioChanScanner(scanner) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() // unblock and cleanup + ioChan = obj.bufioChanScanner(ctx, scanner) } obj.init.Running() // when started, notify engine that we're running @@ -536,18 +539,26 @@ type bufioOutput struct { } // bufioChanScanner wraps the scanner output in a channel. -func (obj *ExecRes) bufioChanScanner(scanner *bufio.Scanner) chan *bufioOutput { +func (obj *ExecRes) bufioChanScanner(ctx context.Context, scanner *bufio.Scanner) chan *bufioOutput { ch := make(chan *bufioOutput) obj.wg.Add(1) go func() { defer obj.wg.Done() defer close(ch) for scanner.Scan() { - ch <- &bufioOutput{text: scanner.Text()} // blocks here ? + select { + case ch <- &bufioOutput{text: scanner.Text()}: // blocks here ? + case <-ctx.Done(): + return + } } // on EOF, scanner.Err() will be nil if err := scanner.Err(); err != nil { - ch <- &bufioOutput{err: err} // send any misc errors we encounter + select { + case ch <- &bufioOutput{err: err}: // send any misc errors we encounter + case <-ctx.Done(): + return + } } }() return ch diff --git a/test/shell/exec-fail.sh b/test/shell/exec-fail.sh index 3a2b019b..c4701b44 100755 --- a/test/shell/exec-fail.sh +++ b/test/shell/exec-fail.sh @@ -6,7 +6,7 @@ # TODO: should we return a different exit code if the resources fail? # TODO: should we be converged if one of the resources has permanently failed? $TIMEOUT "$MGMT" run --converged-timeout=15 --no-watch --no-pgp --tmp-prefix yaml --yaml exec-fail.yaml & - +# there's no ^C sent... it should shutdown on its own pid=$! wait $pid # get exit status exit $?