tests: Workaround regression in two host etcd clusters

If you don't give your two host cluster enough time to "feel healthy",
it will generate an error if you do operations within five seconds. This
is a regression and the five seconds is also quite arbitrary. This is
detailed at: https://github.com/coreos/etcd/issues/6305

This seems to be a bit of a race condition, even with a 10s timer, so
this also disables the StrictReconfigCheck. Re-enable this as soon as
possible.
This commit is contained in:
James Shubin
2016-08-30 04:21:10 -04:00
parent 5e45c5805b
commit 9260066fa3
2 changed files with 5 additions and 4 deletions

View File

@@ -1618,6 +1618,7 @@ func (obj *EmbdEtcd) StartServer(newCluster bool, peerURLsMap etcdtypes.URLsMap)
cfg.APUrls = peerURLs cfg.APUrls = peerURLs
cfg.LCUrls = obj.clientURLs cfg.LCUrls = obj.clientURLs
cfg.LPUrls = peerURLs cfg.LPUrls = peerURLs
cfg.StrictReconfigCheck = false // XXX: workaround https://github.com/coreos/etcd/issues/6305
cfg.InitialCluster = initialPeerURLsMap.String() // including myself! cfg.InitialCluster = initialPeerURLsMap.String() // including myself!
if newCluster { if newCluster {

View File

@@ -3,20 +3,20 @@
# run empty graphs, we're just testing etcd clustering # run empty graphs, we're just testing etcd clustering
timeout --kill-after=120s 90s ./mgmt run --hostname h1 --tmp-prefix & timeout --kill-after=120s 90s ./mgmt run --hostname h1 --tmp-prefix &
pid1=$! pid1=$!
sleep 5s # let it startup sleep 10s # let it startup
timeout --kill-after=120s 90s ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix & timeout --kill-after=120s 90s ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix &
pid2=$! pid2=$!
sleep 5s sleep 10s
$(sleep 5s && kill -SIGINT $pid2)& # send ^C to exit 2nd mgmt $(sleep 10s && kill -SIGINT $pid2)& # send ^C to exit 2nd mgmt
wait $pid2 wait $pid2
e=$? e=$?
if [ $e -ne 0 ]; then if [ $e -ne 0 ]; then
exit $e exit $e
fi fi
$(sleep 5s && kill -SIGINT $pid1)& # send ^C to exit 1st mgmt $(sleep 10s && kill -SIGINT $pid1)& # send ^C to exit 1st mgmt
wait $pid1 # get exit status wait $pid1 # get exit status
# if pid1 exits because of a timeout, then it blocked, and this is a bug! # if pid1 exits because of a timeout, then it blocked, and this is a bug!
exit $? exit $?