From bc390088b3fe057b31109aeda6fad2176c61bcb4 Mon Sep 17 00:00:00 2001 From: James Shubin Date: Wed, 30 Aug 2023 01:15:29 -0400 Subject: [PATCH] etcd: Don't unset our only endpoint When mgmt is in etcd-client-only mode and using an external etcd server, we don't want to unset our only known endpoint since this would deadlock our etcd client since it can't connect to anyone. This could have happened because a plain etcd server didn't set any endpoints to follow, and as a result we noticed it was empty and decided to use that instead. To workaround this issue on an earlier version of mgmt, you would have had to run: etcdctl put /_mgmt/endpoints/etcd http://localhost:2379 to set this magic key on the initial etcd server. --- etcd/callback.go | 6 +++++- etcd/etcd.go | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/etcd/callback.go b/etcd/callback.go index a0e6609a..4ee4a458 100644 --- a/etcd/callback.go +++ b/etcd/callback.go @@ -82,11 +82,15 @@ func (obj *EmbdEtcd) endpointApply(data *interfaces.WatcherData) error { } // is the endpoint list different? - if err := cmpURLsMap(obj.endpoints, endpoints); err != nil { + // TODO: do we want to use the skipEndpointApply here too? + skipEndpointApply := obj.NoServer && len(endpoints) == 0 && len(obj.endpoints) > 0 + if err := cmpURLsMap(obj.endpoints, endpoints); err != nil && !skipEndpointApply { obj.endpoints = endpoints // set // can happen if a server drops out for example obj.Logf("endpoint list changed to: %+v", endpoints) obj.setEndpoints() + } else if err != nil && skipEndpointApply { + obj.Logf("skipping endpoints apply") } return nil } diff --git a/etcd/etcd.go b/etcd/etcd.go index 87ca9b25..365818fc 100644 --- a/etcd/etcd.go +++ b/etcd/etcd.go @@ -1283,6 +1283,26 @@ func (obj *EmbdEtcd) runEndpoints(ctx context.Context) error { obj.err(errwrap.Wrapf(err, "get endpoints errored")) continue } + + // If we're using an external etcd server, then + // we would not initially see any endpoints, and + // we'd erase our knowledge of our single + // endpoint, which would deadlock our etcd + // client. Instead, skip updates that take our + // list down to zero endpoints. We maintain this + // watch in case someone (an mgmt resource that + // is managing the etcd cluster for example?) + // wants to set these values for all of the mgmt + // clients connected to the etcd server pool. + if obj.NoServer && len(endpoints) == 0 && len(obj.endpoints) > 0 { + if obj.Debug { + obj.Logf("had %d endpoints: %+v", len(obj.endpoints), obj.endpoints) + obj.Logf("got %d endpoints: %+v", len(endpoints), endpoints) + } + obj.Logf("skipping endpoints update") + continue + } + obj.endpoints = endpoints obj.setEndpoints()