The MemberList() function which apparently looks at all of the endpoints was blocking right after a member exited, because we still had a stale list of endpoints, and while a member that exited safely would update the endpoints lists, the other member would (unavoidably) race to get that message, and so it might call the MemberList() with the now stale endpoint list. This way we invalidate an endpoint we know to be gone immediately. This also adds a simple test case to catch this scenario.
23 lines
626 B
Bash
Executable File
23 lines
626 B
Bash
Executable File
#!/bin/bash -e
|
|
|
|
# run empty graphs, we're just testing etcd clustering
|
|
timeout --kill-after=120s 90s ./mgmt run --hostname h1 &
|
|
pid1=$!
|
|
sleep 5s # let it startup
|
|
|
|
timeout --kill-after=120s 90s ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 &
|
|
pid2=$!
|
|
sleep 5s
|
|
|
|
$(sleep 5s && kill -SIGINT $pid2)& # send ^C to exit 2nd mgmt
|
|
wait $pid2
|
|
e=$?
|
|
if [ $e -ne 0 ]; then
|
|
exit $e
|
|
fi
|
|
|
|
$(sleep 5s && kill -SIGINT $pid1)& # send ^C to exit 1st mgmt
|
|
wait $pid1 # get exit status
|
|
# if pid1 exits because of a timeout, then it blocked, and this is a bug!
|
|
exit $?
|