engine: lang: util: Kill race in socketset

After some investigation, it appears that SocketSet.Shutdown() and
SocketSet.Close() are not synchronous operations. The sendto system call
called in SocketSet.Shutdown() is not a blocking send. That means there
is a race in which SocketSet.Shutdown() sends a message to a file
descriptor to unblock select, while SocketSet.Close() will close the
file descriptor that the message is being sent to. If SocketSet.Close()
wins the race, select is listening on a dead file descriptor and will
hang indefinitely.

This is fixed in the current master by putting SocketSet.Close() inside
of the goroutine in which data from the socket is being received. It
relies on SocketSet.Shutdown() being called to terminate the goroutine.
While this works most of the time, there is a race here. All the
goroutines can also be terminated by a closeChan. If the goroutine
receives an event (thus unblocking select) and then closeChan is
triggered, both SocketSet.Shutdown() and SocketSet.Close() race, leading
to undefined behavior.

This patch ensures the ordering of the two function calls by pulling
them both out of the goroutine and separating them with a WaitGroup.

Co-authored-by: James Shubin <james@shubin.ca>
This commit is contained in:
Kevin Kuehler
2019-01-21 14:08:13 -08:00
parent 0c17a0b4f2
commit 5d0efce278
3 changed files with 22 additions and 12 deletions

View File

@@ -191,15 +191,19 @@ func (obj *NetRes) Close() error {
// TODO: currently gets events from ALL interfaces, would be nice to reject // TODO: currently gets events from ALL interfaces, would be nice to reject
// events from other interfaces. // events from other interfaces.
func (obj *NetRes) Watch() error { func (obj *NetRes) Watch() error {
// waitgroup for netlink receive goroutine
wg := &sync.WaitGroup{}
defer wg.Wait()
// create a netlink socket for receiving network interface events // create a netlink socket for receiving network interface events
conn, err := socketset.NewSocketSet(rtmGrps, obj.socketFile, unix.NETLINK_ROUTE) conn, err := socketset.NewSocketSet(rtmGrps, obj.socketFile, unix.NETLINK_ROUTE)
if err != nil { if err != nil {
return errwrap.Wrapf(err, "error creating socket set") return errwrap.Wrapf(err, "error creating socket set")
} }
// waitgroup for netlink receive goroutine
wg := &sync.WaitGroup{}
defer conn.Close()
// We must wait for the Shutdown() AND the select inside of SocketSet to
// complete before we Close, since the unblocking in SocketSet is not a
// synchronous operation.
defer wg.Wait()
defer conn.Shutdown() // close the netlink socket and unblock conn.receive() defer conn.Shutdown() // close the netlink socket and unblock conn.receive()
// watch the systemd-networkd configuration file // watch the systemd-networkd configuration file
@@ -220,7 +224,6 @@ func (obj *NetRes) Watch() error {
wg.Add(1) wg.Add(1)
go func() { go func() {
defer wg.Done() defer wg.Done()
defer conn.Close() // close the pipe when we're done with it
defer close(nlChan) defer close(nlChan)
for { for {
// receive messages from the socket set // receive messages from the socket set

View File

@@ -76,9 +76,12 @@ func (obj CPUCountFact) Stream() error {
// waitgroup for netlink receive goroutine // waitgroup for netlink receive goroutine
wg := &sync.WaitGroup{} wg := &sync.WaitGroup{}
defer wg.Wait()
defer ss.Close() defer ss.Close()
defer ss.Shutdown() // We must wait for the Shutdown() AND the select inside of SocketSet to
// complete before we Close, since the unblocking in SocketSet is not a
// synchronous operation.
defer wg.Wait()
defer ss.Shutdown() // close the netlink socket and unblock conn.receive()
eventChan := make(chan *nlChanEvent) // updated in goroutine when we receive uevent eventChan := make(chan *nlChanEvent) // updated in goroutine when we receive uevent
closeChan := make(chan struct{}) // channel to unblock selects in goroutine closeChan := make(chan struct{}) // channel to unblock selects in goroutine

View File

@@ -108,18 +108,22 @@ func TestNfd(t *testing.T) {
// test SocketSet.Shutdown() // test SocketSet.Shutdown()
func TestShutdown(t *testing.T) { func TestShutdown(t *testing.T) {
wg := &sync.WaitGroup{}
defer wg.Wait()
// pass 0 so we create a socket that doesn't receive any events // pass 0 so we create a socket that doesn't receive any events
ss, err := NewSocketSet(0, "pipe.sock", 0) ss, err := NewSocketSet(0, "pipe.sock", 0)
if err != nil { if err != nil {
t.Errorf("could not create SocketSet: %+v", err) t.Errorf("could not create SocketSet: %+v", err)
} }
// waitgroup for netlink receive goroutine
wg := &sync.WaitGroup{}
defer ss.Close()
// We must wait for the Shutdown() AND the select inside of SocketSet to
// complete before we Close, since the unblocking in SocketSet is not a
// synchronous operation.
defer wg.Wait()
defer ss.Shutdown() // close the netlink socket and unblock conn.receive()
closeChan := make(chan struct{}) closeChan := make(chan struct{})
defer close(closeChan) defer close(closeChan)
defer ss.Close()
defer ss.Shutdown()
// create a listener that never receives any data // create a listener that never receives any data
wg.Add(1) // add a waitgroup to ensure this will block if we don't properly unblock Select wg.Add(1) // add a waitgroup to ensure this will block if we don't properly unblock Select