tests: Workaround regression in two host etcd clusters

If you don't give your two host cluster enough time to "feel healthy", it will generate an error if you do operations within five seconds. This is a regression and the five seconds is also quite arbitrary. This is detailed at: https://github.com/coreos/etcd/issues/6305 This seems to be a bit of a race condition, even with a 10s timer, so this also disables the StrictReconfigCheck. Re-enable this as soon as possible.
2016-08-30 04:21:10 -04:00
parent 5e45c5805b
commit 9260066fa3
2 changed files with 5 additions and 4 deletions
--- a/etcd.go
+++ b/etcd.go
@@ -1618,6 +1618,7 @@ func (obj *EmbdEtcd) StartServer(newCluster bool, peerURLsMap etcdtypes.URLsMap)
 	cfg.APUrls = peerURLs
 	cfg.LCUrls = obj.clientURLs
 	cfg.LPUrls = peerURLs
+	cfg.StrictReconfigCheck = false // XXX: workaround https://github.com/coreos/etcd/issues/6305

 	cfg.InitialCluster = initialPeerURLsMap.String() // including myself!
 	if newCluster {
--- a/test/shell/t8.sh
+++ b/test/shell/t8.sh
@@ -3,20 +3,20 @@
 # run empty graphs, we're just testing etcd clustering
 timeout --kill-after=120s 90s ./mgmt run --hostname h1 --tmp-prefix &
 pid1=$!
-sleep 5s	# let it startup
+sleep 10s	# let it startup

 timeout --kill-after=120s 90s ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix &
 pid2=$!
-sleep 5s
+sleep 10s

-$(sleep 5s && kill -SIGINT $pid2)&	# send ^C to exit 2nd mgmt
+$(sleep 10s && kill -SIGINT $pid2)&	# send ^C to exit 2nd mgmt
 wait $pid2
 e=$?
 if [ $e -ne 0 ]; then
 	exit $e
 fi

-$(sleep 5s && kill -SIGINT $pid1)&	# send ^C to exit 1st mgmt
+$(sleep 10s && kill -SIGINT $pid1)&	# send ^C to exit 1st mgmt
 wait $pid1	# get exit status
 # if pid1 exits because of a timeout, then it blocked, and this is a bug!
 exit $?