From 9260066fa302395c9f163391f376074e2b872830 Mon Sep 17 00:00:00 2001 From: James Shubin Date: Tue, 30 Aug 2016 04:21:10 -0400 Subject: [PATCH] tests: Workaround regression in two host etcd clusters If you don't give your two host cluster enough time to "feel healthy", it will generate an error if you do operations within five seconds. This is a regression and the five seconds is also quite arbitrary. This is detailed at: https://github.com/coreos/etcd/issues/6305 This seems to be a bit of a race condition, even with a 10s timer, so this also disables the StrictReconfigCheck. Re-enable this as soon as possible. --- etcd.go | 1 + test/shell/t8.sh | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/etcd.go b/etcd.go index c08de232..c8fce194 100644 --- a/etcd.go +++ b/etcd.go @@ -1618,6 +1618,7 @@ func (obj *EmbdEtcd) StartServer(newCluster bool, peerURLsMap etcdtypes.URLsMap) cfg.APUrls = peerURLs cfg.LCUrls = obj.clientURLs cfg.LPUrls = peerURLs + cfg.StrictReconfigCheck = false // XXX: workaround https://github.com/coreos/etcd/issues/6305 cfg.InitialCluster = initialPeerURLsMap.String() // including myself! if newCluster { diff --git a/test/shell/t8.sh b/test/shell/t8.sh index 6db30e3b..d159fb72 100755 --- a/test/shell/t8.sh +++ b/test/shell/t8.sh @@ -3,20 +3,20 @@ # run empty graphs, we're just testing etcd clustering timeout --kill-after=120s 90s ./mgmt run --hostname h1 --tmp-prefix & pid1=$! -sleep 5s # let it startup +sleep 10s # let it startup timeout --kill-after=120s 90s ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix & pid2=$! -sleep 5s +sleep 10s -$(sleep 5s && kill -SIGINT $pid2)& # send ^C to exit 2nd mgmt +$(sleep 10s && kill -SIGINT $pid2)& # send ^C to exit 2nd mgmt wait $pid2 e=$? if [ $e -ne 0 ]; then exit $e fi -$(sleep 5s && kill -SIGINT $pid1)& # send ^C to exit 1st mgmt +$(sleep 10s && kill -SIGINT $pid1)& # send ^C to exit 1st mgmt wait $pid1 # get exit status # if pid1 exits because of a timeout, then it blocked, and this is a bug! exit $?