tests: Workaround regression in two host etcd clusters
If you don't give your two host cluster enough time to "feel healthy", it will generate an error if you do operations within five seconds. This is a regression and the five seconds is also quite arbitrary. This is detailed at: https://github.com/coreos/etcd/issues/6305 This seems to be a bit of a race condition, even with a 10s timer, so this also disables the StrictReconfigCheck. Re-enable this as soon as possible.
This commit is contained in:
1
etcd.go
1
etcd.go
@@ -1618,6 +1618,7 @@ func (obj *EmbdEtcd) StartServer(newCluster bool, peerURLsMap etcdtypes.URLsMap)
|
||||
cfg.APUrls = peerURLs
|
||||
cfg.LCUrls = obj.clientURLs
|
||||
cfg.LPUrls = peerURLs
|
||||
cfg.StrictReconfigCheck = false // XXX: workaround https://github.com/coreos/etcd/issues/6305
|
||||
|
||||
cfg.InitialCluster = initialPeerURLsMap.String() // including myself!
|
||||
if newCluster {
|
||||
|
||||
@@ -3,20 +3,20 @@
|
||||
# run empty graphs, we're just testing etcd clustering
|
||||
timeout --kill-after=120s 90s ./mgmt run --hostname h1 --tmp-prefix &
|
||||
pid1=$!
|
||||
sleep 5s # let it startup
|
||||
sleep 10s # let it startup
|
||||
|
||||
timeout --kill-after=120s 90s ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix &
|
||||
pid2=$!
|
||||
sleep 5s
|
||||
sleep 10s
|
||||
|
||||
$(sleep 5s && kill -SIGINT $pid2)& # send ^C to exit 2nd mgmt
|
||||
$(sleep 10s && kill -SIGINT $pid2)& # send ^C to exit 2nd mgmt
|
||||
wait $pid2
|
||||
e=$?
|
||||
if [ $e -ne 0 ]; then
|
||||
exit $e
|
||||
fi
|
||||
|
||||
$(sleep 5s && kill -SIGINT $pid1)& # send ^C to exit 1st mgmt
|
||||
$(sleep 10s && kill -SIGINT $pid1)& # send ^C to exit 1st mgmt
|
||||
wait $pid1 # get exit status
|
||||
# if pid1 exits because of a timeout, then it blocked, and this is a bug!
|
||||
exit $?
|
||||
|
||||
Reference in New Issue
Block a user