etcd: Add a timeout for etcd server to start correctly

This also updates etcd to a newer version with a fix that allows this
detection and timeout operation to be possible.
This commit is contained in:
James Shubin
2016-08-29 03:57:08 -04:00
parent 9260066fa3
commit 8d3011fb9c
2 changed files with 14 additions and 3 deletions

15
etcd.go
View File

@@ -72,6 +72,7 @@ import (
const ( const (
NS = "_mgmt" // root namespace for mgmt operations NS = "_mgmt" // root namespace for mgmt operations
seedSentinel = "_seed" // you must not name your hostname this seedSentinel = "_seed" // you must not name your hostname this
maxStartServerTimeout = 60 // max number of seconds to wait for server to start
maxStartServerRetries = 3 // number of times to retry starting the etcd server maxStartServerRetries = 3 // number of times to retry starting the etcd server
maxClientConnectRetries = 5 // number of times to retry consecutive connect failures maxClientConnectRetries = 5 // number of times to retry consecutive connect failures
selfRemoveTimeout = 3 // give unnominated members a chance to self exit selfRemoveTimeout = 3 // give unnominated members a chance to self exit
@@ -1629,11 +1630,21 @@ func (obj *EmbdEtcd) StartServer(newCluster bool, peerURLsMap etcdtypes.URLsMap)
//cfg.ForceNewCluster = newCluster // TODO ? //cfg.ForceNewCluster = newCluster // TODO ?
log.Printf("Etcd: StartServer: Starting server...") log.Printf("Etcd: StartServer: Starting server...")
obj.server, err = embed.StartEtcd(cfg) // we hang here if things are bad obj.server, err = embed.StartEtcd(cfg)
log.Printf("Etcd: StartServer: Done starting server!") // it didn't hang!
if err != nil { if err != nil {
return err return err
} }
select {
case <-obj.server.Server.ReadyNotify(): // we hang here if things are bad
log.Printf("Etcd: StartServer: Done starting server!") // it didn't hang!
case <-time.After(time.Duration(maxStartServerTimeout) * time.Second):
e := fmt.Errorf("Etcd: StartServer: Timeout of %d seconds reached!", maxStartServerTimeout)
log.Printf(e.Error())
obj.server.Server.Stop() // trigger a shutdown
obj.serverwg.Add(1) // add for the DestroyServer()
obj.DestroyServer()
return e
}
//log.Fatal(<-obj.server.Err()) XXX //log.Fatal(<-obj.server.Err()) XXX
log.Printf("Etcd: StartServer: Server running...") log.Printf("Etcd: StartServer: Server running...")
obj.memberId = uint64(obj.server.Server.ID()) // store member id for internal use obj.memberId = uint64(obj.server.Server.ID()) // store member id for internal use