diff --git a/docs/faq.md b/docs/faq.md index 3f771ae9..deda3840 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -215,23 +215,25 @@ requires a number of seconds as an argument. ./mgmt run lang --lang examples/lang/hello0.mcl --converged-timeout=5 ``` -### What does the error message about an inconsistent dataDir mean? +### On startup `mgmt` hangs after: `etcd: server: starting...`. If you get an error message similar to: ``` -Etcd: Connect: CtxError... -Etcd: CtxError: Reason: CtxDelayErr(5s): No endpoints available yet! -Etcd: Connect: Endpoints: [] -Etcd: The dataDir (/var/lib/mgmt/etcd) might be inconsistent or corrupt. +etcd: server: starting... +etcd: server: start timeout of 1m0s reached +etcd: server: close timeout of 15s reached ``` -This happens when there are a series of fatal connect errors in a row. This can -happen when you start `mgmt` using a dataDir that doesn't correspond to the -current cluster view. As a result, the embedded etcd server never finishes -starting up, and as a result, a default endpoint never gets added. The solution -is to either reconcile the mistake, and if there is no important data saved, you -can remove the etcd dataDir. This is typically `/var/lib/mgmt/etcd/member/`. +But nothing happens afterwards, this can be due to a corrupt etcd storage +directory. Each etcd server embedded in mgmt must have a special directory where +it stores local state. It must not be shared by more than one individual member. +This dir is typically `/var/lib/mgmt/etcd/member/`. If you accidentally use it +(for example during testing) with a different cluster view, then you can corrupt +it. This can happen if you use it with more than one different hostname. + +The solution is to avoid making this mistake, and if there is no important data +saved, you can remove the etcd member dir and start over. ### On running `make` to build a new version, it errors with: `Text file busy`. diff --git a/engine/graph/engine.go b/engine/graph/engine.go index 11738792..c37a1b31 100644 --- a/engine/graph/engine.go +++ b/engine/graph/engine.go @@ -62,6 +62,13 @@ type Engine struct { // If the struct does not validate, or it cannot initialize, then this errors. // Initially it will contain an empty graph. func (obj *Engine) Init() error { + if obj.Program == "" { + return fmt.Errorf("the Program is empty") + } + if obj.Hostname == "" { + return fmt.Errorf("the Hostname is empty") + } + var err error if obj.graph, err = pgraph.NewGraph("graph"); err != nil { return err diff --git a/engine/resources/config_etcd.go b/engine/resources/config_etcd.go new file mode 100644 index 00000000..77c6dfe7 --- /dev/null +++ b/engine/resources/config_etcd.go @@ -0,0 +1,250 @@ +// Mgmt +// Copyright (C) 2013-2019+ James Shubin and the project contributors +// Written by James Shubin and the project contributors +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package resources + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/purpleidea/mgmt/engine" + "github.com/purpleidea/mgmt/engine/traits" + "github.com/purpleidea/mgmt/util" + "github.com/purpleidea/mgmt/util/errwrap" +) + +func init() { + engine.RegisterResource("config:etcd", func() engine.Res { return &ConfigEtcdRes{} }) +} + +const ( + sizeCheckApplyTimeout = 5 * time.Second +) + +// ConfigEtcdRes is a resource that sets mgmt's etcd configuration. +type ConfigEtcdRes struct { + traits.Base // add the base methods without re-implementation + + init *engine.Init + + // IdealClusterSize is the requested minimum size of the cluster. If you + // set this to zero, it will cause a cluster wide shutdown if + // AllowSizeShutdown is true. If it's not true, then it will cause a + // validation error. + IdealClusterSize uint16 `lang:"idealclustersize"` + // AllowSizeShutdown is a required safety flag that you must set to true + // if you want to allow causing a cluster shutdown by setting + // IdealClusterSize to zero. + AllowSizeShutdown bool `lang:"allow_size_shutdown"` + + // sizeFlag determines whether sizeCheckApply already ran or not. + sizeFlag bool + + interruptChan chan struct{} + wg *sync.WaitGroup +} + +// Default returns some sensible defaults for this resource. +func (obj *ConfigEtcdRes) Default() engine.Res { + return &ConfigEtcdRes{} +} + +// Validate if the params passed in are valid data. +func (obj *ConfigEtcdRes) Validate() error { + if obj.IdealClusterSize < 0 { + return fmt.Errorf("the IdealClusterSize param must be positive") + } + + if obj.IdealClusterSize == 0 && !obj.AllowSizeShutdown { + return fmt.Errorf("the IdealClusterSize can't be zero if AllowSizeShutdown is false") + } + + return nil +} + +// Init runs some startup code for this resource. +func (obj *ConfigEtcdRes) Init(init *engine.Init) error { + obj.init = init // save for later + + obj.interruptChan = make(chan struct{}) + obj.wg = &sync.WaitGroup{} + + return nil +} + +// Close is run by the engine to clean up after the resource is done. +func (obj *ConfigEtcdRes) Close() error { + obj.wg.Wait() // bonus + return nil +} + +// Watch is the primary listener for this resource and it outputs events. +func (obj *ConfigEtcdRes) Watch() error { + obj.wg.Add(1) + defer obj.wg.Done() + // FIXME: add timeout to context + // The obj.init.Done channel is closed by the engine to signal shutdown. + ctx, cancel := util.ContextWithCloser(context.Background(), obj.init.Done) + defer cancel() + ch, err := obj.init.World.IdealClusterSizeWatch(util.CtxWithWg(ctx, obj.wg)) + if err != nil { + return errwrap.Wrapf(err, "could not watch ideal cluster size") + } + + obj.init.Running() // when started, notify engine that we're running + +Loop: + for { + select { + case event, ok := <-ch: + if !ok { + break Loop + } + if obj.init.Debug { + obj.init.Logf("event: %+v", event) + } + // pass through and send an event + + case <-obj.init.Done: // closed by the engine to signal shutdown + } + + obj.init.Event() // notify engine of an event (this can block) + } + + return nil +} + +// sizeCheckApply sets the IdealClusterSize parameter. If it sees a value change +// to zero, then it *won't* try and change it away from zero, because it assumes +// that someone has requested a shutdown. If the value is seen on first startup, +// then it will change it, because it might be a zero from the previous cluster. +func (obj *ConfigEtcdRes) sizeCheckApply(apply bool) (bool, error) { + wg := &sync.WaitGroup{} + defer wg.Wait() // this must be above the defer cancel() call + ctx, cancel := context.WithTimeout(context.Background(), sizeCheckApplyTimeout) + defer cancel() + wg.Add(1) + go func() { + defer wg.Done() + select { + case <-obj.interruptChan: + cancel() + case <-ctx.Done(): + // let this exit + } + }() + + val, err := obj.init.World.IdealClusterSizeGet(ctx) + if err != nil { + return false, errwrap.Wrapf(err, "could not get ideal cluster size") + } + + // if we got a value of zero, and we've already run before, then it's ok + if obj.IdealClusterSize != 0 && val == 0 && obj.sizeFlag { + obj.init.Logf("impending cluster shutdown, not setting ideal cluster size") + return true, nil // impending shutdown, don't try and cancel it. + } + obj.sizeFlag = true + + // must be done after setting the above flag + if obj.IdealClusterSize == val { // state is correct + return true, nil + } + + if !apply { + return false, nil + } + + // set! + // This is run as a transaction so we detect if we needed to change it. + changed, err := obj.init.World.IdealClusterSizeSet(ctx, obj.IdealClusterSize) + if err != nil { + return false, errwrap.Wrapf(err, "could not set ideal cluster size") + } + if !changed { + return true, nil // we lost a race, which means no change needed + } + obj.init.Logf("set dynamic cluster size to: %d", obj.IdealClusterSize) + + return false, nil +} + +// CheckApply method for Noop resource. Does nothing, returns happy! +func (obj *ConfigEtcdRes) CheckApply(apply bool) (bool, error) { + checkOK := true + + if c, err := obj.sizeCheckApply(apply); err != nil { + return false, err + } else if !c { + checkOK = false + } + + // TODO: add more config settings management here... + //if c, err := obj.TODOCheckApply(apply); err != nil { + // return false, err + //} else if !c { + // checkOK = false + //} + + return checkOK, nil // w00t +} + +// Cmp compares two resources and returns an error if they are not equivalent. +func (obj *ConfigEtcdRes) Cmp(r engine.Res) error { + // we can only compare ConfigEtcdRes to others of the same resource kind + res, ok := r.(*ConfigEtcdRes) + if !ok { + return fmt.Errorf("not a %s", obj.Kind()) + } + + if obj.IdealClusterSize != res.IdealClusterSize { + return fmt.Errorf("the IdealClusterSize param differs") + } + if obj.AllowSizeShutdown != res.AllowSizeShutdown { + return fmt.Errorf("the AllowSizeShutdown param differs") + } + + return nil +} + +// Interrupt is called to ask the execution of this resource to end early. +func (obj *ConfigEtcdRes) Interrupt() error { + close(obj.interruptChan) + return nil +} + +// UnmarshalYAML is the custom unmarshal handler for this struct. +// It is primarily useful for setting the defaults. +func (obj *ConfigEtcdRes) UnmarshalYAML(unmarshal func(interface{}) error) error { + type rawRes ConfigEtcdRes // indirection to avoid infinite recursion + + def := obj.Default() // get the default + res, ok := def.(*ConfigEtcdRes) // put in the right format + if !ok { + return fmt.Errorf("could not convert to ConfigEtcdRes") + } + raw := rawRes(*res) // convert; the defaults go here + + if err := unmarshal(&raw); err != nil { + return err + } + + *obj = ConfigEtcdRes(raw) // restore from indirection with type conversion! + return nil +} diff --git a/engine/resources/kv.go b/engine/resources/kv.go index 698d69ac..9926194f 100644 --- a/engine/resources/kv.go +++ b/engine/resources/kv.go @@ -18,11 +18,15 @@ package resources import ( + "context" "fmt" "strconv" + "sync" + "time" "github.com/purpleidea/mgmt/engine" "github.com/purpleidea/mgmt/engine/traits" + "github.com/purpleidea/mgmt/util" "github.com/purpleidea/mgmt/util/errwrap" ) @@ -39,6 +43,10 @@ const ( SkipCmpStyleString ) +const ( + kvCheckApplyTimeout = 5 * time.Second +) + // KVRes is a resource which writes a key/value pair into cluster wide storage. // It will ensure that the key is set to the requested value. The one exception // is that if you use the SkipLessThan parameter, then it will only replace the @@ -67,6 +75,8 @@ type KVRes struct { // the value is greater when using the SkipLessThan parameter. SkipCmpStyle KVResSkipCmpStyle `lang:"skipcmpstyle" yaml:"skipcmpstyle"` + interruptChan chan struct{} + // TODO: does it make sense to have different backends here? (eg: local) } @@ -107,6 +117,8 @@ func (obj *KVRes) Validate() error { func (obj *KVRes) Init(init *engine.Init) error { obj.init = init // save for later + obj.interruptChan = make(chan struct{}) + return nil } @@ -117,9 +129,17 @@ func (obj *KVRes) Close() error { // Watch is the primary listener for this resource and it outputs events. func (obj *KVRes) Watch() error { - obj.init.Running() // when started, notify engine that we're running + // FIXME: add timeout to context + // The obj.init.Done channel is closed by the engine to signal shutdown. + ctx, cancel := util.ContextWithCloser(context.Background(), obj.init.Done) + defer cancel() - ch := obj.init.World.StrMapWatch(obj.getKey()) // get possible events! + ch, err := obj.init.World.StrMapWatch(ctx, obj.getKey()) // get possible events! + if err != nil { + return err + } + + obj.init.Running() // when started, notify engine that we're running var send = false // send event? for { @@ -191,13 +211,28 @@ func (obj *KVRes) lessThanCheck(value string) (bool, error) { func (obj *KVRes) CheckApply(apply bool) (bool, error) { obj.init.Logf("CheckApply(%t)", apply) + wg := &sync.WaitGroup{} + defer wg.Wait() // this must be above the defer cancel() call + ctx, cancel := context.WithTimeout(context.Background(), kvCheckApplyTimeout) + defer cancel() + wg.Add(1) + go func() { + defer wg.Done() + select { + case <-obj.interruptChan: + cancel() + case <-ctx.Done(): + // let this exit + } + }() + if val, exists := obj.init.Recv()["Value"]; exists && val.Changed { // if we received on Value, and it changed, wooo, nothing to do. obj.init.Logf("CheckApply: `Value` was updated!") } hostname := obj.init.Hostname // me - keyMap, err := obj.init.World.StrMapGet(obj.getKey()) + keyMap, err := obj.init.World.StrMapGet(ctx, obj.getKey()) if err != nil { return false, errwrap.Wrapf(err, "check error during StrGet") } @@ -217,7 +252,7 @@ func (obj *KVRes) CheckApply(apply bool) (bool, error) { return true, nil // nothing to delete, we're good! } else if ok && obj.Value == nil { // delete - err := obj.init.World.StrMapDel(obj.getKey()) + err := obj.init.World.StrMapDel(ctx, obj.getKey()) return false, errwrap.Wrapf(err, "apply error during StrDel") } @@ -225,7 +260,7 @@ func (obj *KVRes) CheckApply(apply bool) (bool, error) { return false, nil } - if err := obj.init.World.StrMapSet(obj.getKey(), *obj.Value); err != nil { + if err := obj.init.World.StrMapSet(ctx, obj.getKey(), *obj.Value); err != nil { return false, errwrap.Wrapf(err, "apply error during StrSet") } @@ -261,6 +296,12 @@ func (obj *KVRes) Cmp(r engine.Res) error { return nil } +// Interrupt is called to ask the execution of this resource to end early. +func (obj *KVRes) Interrupt() error { + close(obj.interruptChan) + return nil +} + // KVUID is the UID struct for KVRes. type KVUID struct { engine.BaseUID diff --git a/engine/world.go b/engine/world.go index 50fb1431..654c3044 100644 --- a/engine/world.go +++ b/engine/world.go @@ -18,6 +18,8 @@ package engine import ( + "context" + "github.com/purpleidea/mgmt/etcd/scheduler" ) @@ -25,22 +27,26 @@ import ( // the GAPI to store state and exchange information throughout the cluster. It // is the interface each machine uses to communicate with the rest of the world. type World interface { // TODO: is there a better name for this interface? - ResWatch() chan error - ResExport([]Res) error + ResWatch(context.Context) (chan error, error) + ResExport(context.Context, []Res) error // FIXME: should this method take a "filter" data struct instead of many args? - ResCollect(hostnameFilter, kindFilter []string) ([]Res, error) + ResCollect(ctx context.Context, hostnameFilter, kindFilter []string) ([]Res, error) - StrWatch(namespace string) chan error + IdealClusterSizeWatch(context.Context) (chan error, error) + IdealClusterSizeGet(context.Context) (uint16, error) + IdealClusterSizeSet(context.Context, uint16) (bool, error) + + StrWatch(ctx context.Context, namespace string) (chan error, error) StrIsNotExist(error) bool - StrGet(namespace string) (string, error) - StrSet(namespace, value string) error - StrDel(namespace string) error + StrGet(ctx context.Context, namespace string) (string, error) + StrSet(ctx context.Context, namespace, value string) error + StrDel(ctx context.Context, namespace string) error // XXX: add the exchange primitives in here directly? - StrMapWatch(namespace string) chan error - StrMapGet(namespace string) (map[string]string, error) - StrMapSet(namespace, value string) error - StrMapDel(namespace string) error + StrMapWatch(ctx context.Context, namespace string) (chan error, error) + StrMapGet(ctx context.Context, namespace string) (map[string]string, error) + StrMapSet(ctx context.Context, namespace, value string) error + StrMapDel(ctx context.Context, namespace string) error Scheduler(namespace string, opts ...scheduler.Option) (*scheduler.Result, error) diff --git a/etcd/callback.go b/etcd/callback.go new file mode 100644 index 00000000..2559772f --- /dev/null +++ b/etcd/callback.go @@ -0,0 +1,497 @@ +// Mgmt +// Copyright (C) 2013-2019+ James Shubin and the project contributors +// Written by James Shubin and the project contributors +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package etcd + +import ( + "context" + "fmt" + "sync" + + "github.com/purpleidea/mgmt/etcd/interfaces" + "github.com/purpleidea/mgmt/util" + "github.com/purpleidea/mgmt/util/errwrap" + + etcd "github.com/coreos/etcd/clientv3" // "clientv3" + pb "github.com/coreos/etcd/etcdserver/etcdserverpb" +) + +// nominateApply applies the changed watcher data onto our local caches. +func (obj *EmbdEtcd) nominateApply(data *interfaces.WatcherData) error { + if data == nil { // ignore empty data + return nil + } + + // If we tried to lookup the nominated members here (in etcd v3) this + // would sometimes block because we would lose the cluster leader once + // the current leader calls the MemberAdd API and it steps down trying + // to form a two host cluster. Instead, we can look at the event + // response data to read the nominated values! Since we only see what + // has *changed* in the response data, we have to keep track of the + // original state and apply the deltas. This must be idempotent in case + // it errors and is called again. If we're retrying and we get a data + // format error, it's probably not the end of the world. + nominated, err := applyDeltaEvents(data, obj.nominated) // map[hostname]URLs (URLsMap) + if err != nil && err != errInconsistentApply { // allow missing deletes + return err // unexpected error, fail + } + // TODO: do we want to sort this if it becomes a list instead of a map? + //sort.Strings(nominated) // deterministic order + obj.nominated = nominated + return nil +} + +// volunteerApply applies the changed watcher data onto our local caches. +func (obj *EmbdEtcd) volunteerApply(data *interfaces.WatcherData) error { + if data == nil { // ignore empty data + return nil + } + volunteers, err := applyDeltaEvents(data, obj.volunteers) // map[hostname]URLs (URLsMap) + if err != nil && err != errInconsistentApply { // allow missing deletes + return err // unexpected error, fail + } + // TODO: do we want to sort this if it becomes a list instead of a map? + //sort.Strings(volunteers) // deterministic order + obj.volunteers = volunteers + return nil +} + +// endpointApply applies the changed watcher data onto our local caches. In this +// particular apply function, it also sets our client with the new endpoints. +func (obj *EmbdEtcd) endpointApply(data *interfaces.WatcherData) error { + if data == nil { // ignore empty data + return nil + } + endpoints, err := applyDeltaEvents(data, obj.endpoints) // map[hostname]URLs (URLsMap) + if err != nil && err != errInconsistentApply { // allow missing deletes + return err // unexpected error, fail + } + + // is the endpoint list different? + if err := cmpURLsMap(obj.endpoints, endpoints); err != nil { + obj.endpoints = endpoints // set + // can happen if a server drops out for example + obj.Logf("endpoint list changed to: %+v", endpoints) + obj.setEndpoints() + } + return nil +} + +// nominateCb runs to respond to the nomination list change events. +// Functionally, it controls the starting and stopping of the server process. If +// a nominate message is received for this machine, then it means it is already +// being added to the cluster with member add and the cluster is now waiting for +// it to start up. When a nominate entry is removed, it's up to this function to +// run the member remove right before it shuts its server down. +func (obj *EmbdEtcd) nominateCb(ctx context.Context) error { + // Ensure that only one copy of this function is run simultaneously. + // This is because we don't want to cause runServer to race with + // destroyServer. Let us completely start up before we can cancel it. As + // a special case, destroyServer itself can race against itself. I don't + // think it's possible for contention on this mutex, but we'll leave it + // in for safety. + obj.nominatedMutex.Lock() + defer obj.nominatedMutex.Unlock() + // This ordering mutex is being added for safety, since there is no good + // reason for this function and volunteerCb to run simultaneously, and + // it might be preventing a race condition that was happening. + obj.orderingMutex.Lock() + defer obj.orderingMutex.Unlock() + if obj.Debug { + obj.Logf("nominateCb") + defer obj.Logf("nominateCb: done!") + } + + // check if i have actually volunteered first of all... + if obj.NoServer || len(obj.ServerURLs) == 0 { + obj.Logf("inappropriately nominated, rogue or stale server?") + // TODO: should we un-nominate ourself? + return nil // we've done our job successfully + } + + // This can happen when we're shutting down, build the nominated value. + if len(obj.nominated) == 0 { + obj.Logf("list of nominations is empty") + //return nil // don't exit, we might want to shutdown the server + } else { + obj.Logf("nominated: %v", obj.nominated) + } + + // if there are no other peers, we create a new server + // TODO: do we need an || len(obj.nominated) == 0 if we're the first? + _, exists := obj.nominated[obj.Hostname] // am i nominated? + newCluster := len(obj.nominated) == 1 && exists + if obj.Debug { + obj.Logf("nominateCb: newCluster: %t; exists: %t; obj.server == nil: %t", newCluster, exists, obj.server == nil) + } + + // TODO: server start retries should be handled inside of runServer... + if obj.serverAction(serverActionStart) { // start + // no server is running, but it should be + wg := &sync.WaitGroup{} + serverReady, ackReady := obj.ServerReady() // must call ack! + serverExited, ackExited := obj.ServerExited() // must call ack! + + var sendError = false + var serverErr error + obj.Logf("waiting for server...") + nominated, err := copyURLsMap(obj.nominated) + if err != nil { + return err + } + wg.Add(1) + go func() { + defer wg.Done() + obj.errExitN = make(chan struct{}) + defer close(obj.errExitN) // multi-signal for errChan close op + // blocks until server exits + serverErr = obj.runServer(newCluster, nominated) + // in case this exits on its own instead of with destroy + defer obj.destroyServer() // run to reset some values + if sendError && serverErr != nil { // exited with an error + select { + case obj.errChan <- errwrap.Wrapf(serverErr, "runServer errored"): + } + } + }() + + // block until either server is ready or an early exit occurs + select { + case <-serverReady: + // detach from our local return of errors from an early + // server exit (pre server ready) and switch to channel + sendError = true // gets set before the ackReady() does + ackReady() // must be called + ackExited() // must be called + // pass + + case <-serverExited: + ackExited() // must be called + ackReady() // must be called + + wg.Wait() // wait for server to finish to get early err + return serverErr + } + + // Once the server is online, we *must* publish this information + // so that (1) others know where to connect to us (2) we provide + // an "event" for member add since there is not any event that's + // currently built-in to etcd and (3) so we have a key to expire + // when we shutdown or crash to give us the member remove event. + // please see issue: https://github.com/coreos/etcd/issues/5277 + + } else if obj.serverAction(serverActionStop) { // stop? + // server is running, but it should not be + + // i have been un-nominated, remove self and shutdown server! + // we don't need to do a member remove if i'm the last one... + if len(obj.nominated) != 0 { // don't call if nobody left but me! + // work around: https://github.com/coreos/etcd/issues/5482 + // and it might make sense to avoid it if we're the last + obj.Logf("member remove: removing self: %d", obj.serverID) + resp, err := obj.memberRemove(ctx, obj.serverID) + if err != nil { + if obj.Debug { + obj.Logf("error with member remove: %v", err) + } + return errwrap.Wrapf(err, "member remove error") + } + if resp != nil { + obj.Logf("member removed (self): %s (%d)", obj.Hostname, obj.serverID) + if err := obj.updateMemberState(resp.Members); err != nil { + return err + } + } + } + + // FIXME: if we fail on destroy should we try to run some of the + // other cleanup tasks that usually afterwards (below) anyways ? + if err := obj.destroyServer(); err != nil { // sync until exited + return errwrap.Wrapf(err, "destroyServer errored") + } + + // We close with this special sentinel only during destroy/exit. + if obj.closing { + return interfaces.ErrShutdown + } + } + + return nil +} + +// volunteerCb runs to respond to the volunteer list change events. +// Functionally, it controls the nominating and adding of members. It typically +// nominates a peer so that it knows it will get to be a server, which causes it +// to start up its server. It also runs the member add operation so that the +// cluster gets quorum safely. The member remove operation is typically run in +// the nominateCb of that server when it is asked to shutdown. This occurs when +// the nominate entry for that server is removed. If a server removes its +// volunteer entry we must respond by removing the nomination so that it can +// receive that message and shutdown. +// FIXME: we might need to respond to member change/disconnect/shutdown events, +// see: https://github.com/coreos/etcd/issues/5277 +// XXX: Don't allow this function to partially run if it is canceled part way +// through... We don't want an inconsistent state where we did unnominate, but +// didn't remove a member... +// XXX: If the leader changes, do we need to kick the volunteerCb or anything +// else that might have required a leader and which returned because it did not +// have one, thus loosing an event? +func (obj *EmbdEtcd) volunteerCb(ctx context.Context) error { + // Ensure that only one copy of this function is run simultaneously. + // It's not entirely clear if this can ever happen or if it's needed, + // but it's an inexpensive safety check that we can add in for now. + obj.volunteerMutex.Lock() + defer obj.volunteerMutex.Unlock() + // This ordering mutex is being added for safety, since there is no good + // reason for this function and nominateCb to run simultaneously, and it + // might be preventing a race condition that was happening. + obj.orderingMutex.Lock() + defer obj.orderingMutex.Unlock() + if obj.Debug { + obj.Logf("volunteerCb") + defer obj.Logf("volunteerCb: done!") + } + + // FIXME: are there any situations where we don't want to short circuit + // here, such as if i'm the last node? + if obj.server == nil { + if obj.Debug { + obj.Logf("i'm not a server yet...") + } + return nil // if i'm not a server, i'm not a leader, return + } + + // FIXME: Instead of checking this, assume yes, and use the + // `WithRequireLeader` wrapper, and just ignore the error from that if + // it's wrong... Combined with events that poke this volunteerCb when + // the leader changes, we shouldn't miss any events... + if isLeader, err := obj.isLeader(ctx); err != nil { // XXX: race! + return errwrap.Wrapf(err, "error determining leader") + } else if !isLeader { + if obj.Debug { + obj.Logf("we are not the leader...") + } + return nil + } + // i am the leader! + + // Remember that the member* operations return the membership, so this + // means we don't need to run an extra memberList in those scenarios... + // However, this can get out of sync easily, so ensure that our member + // information is very recent. + if err := obj.memberStateFromList(ctx); err != nil { + return errwrap.Wrapf(err, "error during state sync") + } + // XXX: If we have any unstarted members here, do we want to reschedule + // this volunteerCb in a moment? Or will we get another event anyways? + + // NOTE: There used to be an is_leader check right here... + // FIXME: Should we use WithRequireLeader instead? Here? Elsewhere? + // https://godoc.org/github.com/coreos/etcd/clientv3#WithRequireLeader + + // FIXME: can this happen, and if so, is it an error or a pass-through? + if len(obj.volunteers) == 0 { + obj.Logf("list of volunteers is empty") + //return fmt.Errorf("volunteer list is empty") + } else { + obj.Logf("volunteers: %+v", obj.volunteers) + } + + // TODO: do we really need to check these errors? + m, err := copyURLsMap(obj.membermap) // list of members... + if err != nil { + return err + } + v, err := copyURLsMap(obj.volunteers) + if err != nil { + return err + } + // Unnominate anyone that unvolunteers, so they can shutdown cleanly... + // FIXME: one step at a time... do we trigger subsequent steps somehow? + obj.Logf("chooser: (%+v)/(%+v)", m, v) + nominate, unnominate, err := obj.Chooser.Choose(m, v) + if err != nil { + return errwrap.Wrapf(err, "chooser error") + } + + // Ensure that we are the *last* in the list if we're unnominating, and + // the *first* in the list if we're nominating. This way, we self-remove + // last, and we self-add first. This is least likely to hurt quorum. + headFn := func(x string) bool { + return x != obj.Hostname + } + tailFn := func(x string) bool { + return x == obj.Hostname + } + nominate = util.PriorityStrSliceSort(nominate, headFn) + unnominate = util.PriorityStrSliceSort(unnominate, tailFn) + obj.Logf("chooser result(+/-): %+v/%+v", nominate, unnominate) + var reterr error + leaderCtx := ctx // default ctx to use + if RequireLeaderCtx { + leaderCtx = etcd.WithRequireLeader(ctx) // FIXME: Is this correct? + } + + for i := range nominate { + member := nominate[i] + peerURLs, exists := obj.volunteers[member] // comma separated list of urls + if !exists { + // if this happens, do we have an update race? + return fmt.Errorf("could not find member `%s` in volunteers map", member) + } + + // NOTE: storing peerURLs when they're already in volunteers/ is + // redundant, but it seems to be necessary for a sane algorithm. + // nominate before we call the API so that members see it first! + if err := obj.nominate(leaderCtx, member, peerURLs); err != nil { + return errwrap.Wrapf(err, "error nominating: %s", member) + } + // XXX: can we add a ttl here, because once we nominate someone, + // we need to give them up to N seconds to start up after we run + // the MemberAdd API because if they don't, in some situations + // such as if we're adding the second node to the cluster, then + // we've lost quorum until a second member joins! If the TTL + // expires, we need to MemberRemove! In this special case, we + // need to forcefully remove the second member if we don't add + // them, because we'll be in a lack of quorum state and unable + // to do anything... As a result, we should always only add ONE + // member at a time! + + // XXX: After we memberAdd, can we wait a timeout, and then undo + // the add if the member doesn't come up? We'd also need to run + // an unnominate too, and mark the node as temporarily failed... + obj.Logf("member add: %s: %v", member, peerURLs) + resp, err := obj.memberAdd(leaderCtx, peerURLs) + if err != nil { + // FIXME: On on error this function needs to run again, + // because we need to make sure to add the member here! + return errwrap.Wrapf(err, "member add error") + } + if resp != nil { // if we're already the right state, we get nil + obj.Logf("member added: %s (%d): %v", member, resp.Member.ID, peerURLs) + if err := obj.updateMemberState(resp.Members); err != nil { + return err + } + if resp.Member.Name == "" { // not started instantly ;) + obj.addMemberState(member, resp.Member.ID, peerURLs, nil) + } + // TODO: would this ever happen or be necessary? + //if member == obj.Hostname { + // obj.addSelfState() + //} + } + } + + // we must remove them from the members API or it will look like a crash + if l := len(unnominate); l > 0 { + obj.Logf("unnominated: shutting down %d members...", l) + } + for i := range unnominate { + member := unnominate[i] + memberID, exists := obj.memberIDs[member] // map[string]uint64 + if !exists { + // if this happens, do we have an update race? + return fmt.Errorf("could not find member `%s` in memberIDs map", member) + } + + // start a watcher to know if member was added + cancelCtx, cancel := context.WithCancel(leaderCtx) + defer cancel() + timeout := util.CloseAfter(cancelCtx, SelfRemoveTimeout) // chan closes + fn := func(members []*pb.Member) error { + for _, m := range members { + if m.Name == member || m.ID == memberID { + return fmt.Errorf("still present") + } + } + + return nil // not found! + } + ch, err := obj.memberChange(cancelCtx, fn, MemberChangeInterval) + if err != nil { + return errwrap.Wrapf(err, "error watching for change of: %s", member) + } + if err := obj.nominate(leaderCtx, member, nil); err != nil { // unnominate + return errwrap.Wrapf(err, "error unnominating: %s", member) + } + // Once we issue the above unnominate, that peer will + // shutdown, and this might cause us to loose quorum, + // therefore, let that member remove itself, and then + // double check that it did happen in case delinquent. + // TODO: get built-in transactional member Add/Remove + // functionality to avoid a separate nominate list... + + // If we're removing ourself, then let the (un)nominate callback + // do it. That way it removes itself cleanly on server shutdown. + if member == obj.Hostname { // remove in unnominate! + cancel() + obj.Logf("unnominate: removing self...") + continue + } + + // cancel remove sleep and unblock early on event... + obj.Logf("waiting %s for %s to self remove...", SelfRemoveTimeout.String(), member) + select { + case <-timeout: + // pass + case err, ok := <-ch: + if ok { + select { + case <-timeout: + // wait until timeout finishes + } + reterr = errwrap.Append(reterr, err) + } + // removed quickly! + } + cancel() + + // In case the removed member doesn't remove itself, do it! + resp, err := obj.memberRemove(leaderCtx, memberID) + if err != nil { + return errwrap.Wrapf(err, "member remove error") + } + if resp != nil { + obj.Logf("member removed (forced): %s (%d)", member, memberID) + if err := obj.updateMemberState(resp.Members); err != nil { + return err + } + // Do this I guess, but the TTL will eventually get it. + // Remove the other member to avoid client connections. + if err := obj.advertise(leaderCtx, member, nil); err != nil { + return err + } + } + + // Remove the member from our lists to avoid blocking future + // possible MemberList calls which would try and connect to a + // missing member... The lists should get updated from the + // member exiting safely if it doesn't crash, but if it did + // and/or since it's a race to see if the update event will get + // seen before we need the new data, just do it now anyways. + // TODO: Is the above comment still true? + obj.rmMemberState(member) // proactively delete it + + obj.Logf("member %s (%d) removed successfully!", member, memberID) + } + + // NOTE: We could ensure that etcd reconnects here, but we can just wait + // for the endpoints callback which should see the state change instead. + + obj.setEndpoints() // sync client with new endpoints + return reterr +} diff --git a/etcd/chooser/chooser.go b/etcd/chooser/chooser.go new file mode 100644 index 00000000..09a22962 --- /dev/null +++ b/etcd/chooser/chooser.go @@ -0,0 +1,98 @@ +// Mgmt +// Copyright (C) 2013-2019+ James Shubin and the project contributors +// Written by James Shubin and the project contributors +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package chooser + +import ( + "context" + + "github.com/purpleidea/mgmt/etcd/interfaces" + + etcdtypes "github.com/coreos/etcd/pkg/types" +) + +// Data represents the input data that is passed to the chooser. +type Data struct { + // Hostname is the hostname running this chooser instance. It can be + // used as a unique key in the cluster. + Hostname string // ourself + + Debug bool + Logf func(format string, v ...interface{}) +} + +// Chooser represents the interface you must implement if you want to be able to +// control which cluster members are added and removed. Remember that this can +// get run from any peer (server) machine in the cluster, and that this may +// change as different leaders are elected! Do not assume any state will remain +// between invocations. If you want to maintain hysteresis or state, make sure +// to synchronize it in etcd. +type Chooser interface { + // Validate validates the chooser implementation to ensure the params + // represent a valid instantiation. + Validate() error + + // Init initializes the chooser and passes in some useful data and + // handles. + Init(*Data) error + + // Connect will be called with a client interfaces.Client that you can + // use if necessary to store some shared state between instances of this + // and watch for external changes. Sharing state between members should + // be avoided if possible, and there is no guarantee that your data + // won't be deleted in a disaster. There are no backups for this, + // regenerate anything you might need. Additionally, this may only be + // used inside the Chooser method, since Connect is only called after + // Init. This is however very useful for implementing special choosers. + // Since some operations can run on connect, it gets a context. If you + // cancel this context, then you might expect that Watch could die too. + // Both of these should get cancelled if you call Disconnect. + Connect(context.Context, interfaces.Client) error // we get given a namespaced client + + // Disconnect tells us to cancel our use of the client interface that we + // got from the Connect method. We must not return until we're done. + Disconnect() error + + // Watch is called by the engine to allow us to Watch for changes that + // might cause us to want to re-evaluate our nomination decision. It + // should error if it cannot startup. Once it is running, it should send + // a nil error on every event, and an error if things go wrong. When + // Disconnect is shutdown, then that should cause this to exit. When + // this sends events, Choose will usually eventually get called in + // response. + Watch() (chan error, error) + + // Choose takes the current peer membership state, and the available + // volunteers, and produces a list of who we should add and who should + // quit. In general, it's best to only remove one member at a time, in + // particular because this will get called iteratively on future events, + // and it can remove subsequent members on the next iteration. One + // important note: when building a new cluster, we do assume that out of + // one available volunteer, and no members, that this first volunteer is + // selected. Make sure that any implementations of this function do this + // as well, since otherwise the hardcoded initial assumption would be + // proven wrong here! + // TODO: we could pass in two lists of hostnames instead of the full + // URLsMap here, but let's keep it more complicated now in case, and + // reduce it down later if needed... + // TODO: should we add a step arg here ? + Choose(membership, volunteers etcdtypes.URLsMap) (nominees, quitters []string, err error) + + // Close runs some cleanup routines in case there is anything that you'd + // like to free after we're done. + Close() error +} diff --git a/etcd/chooser/dynamicsize.go b/etcd/chooser/dynamicsize.go new file mode 100644 index 00000000..3f60f520 --- /dev/null +++ b/etcd/chooser/dynamicsize.go @@ -0,0 +1,285 @@ +// Mgmt +// Copyright (C) 2013-2019+ James Shubin and the project contributors +// Written by James Shubin and the project contributors +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package chooser + +import ( + "context" + "fmt" + "strconv" + "sync" + + "github.com/purpleidea/mgmt/etcd/interfaces" + + etcd "github.com/coreos/etcd/clientv3" + etcdtypes "github.com/coreos/etcd/pkg/types" +) + +// XXX: Test causing cluster shutdowns with: +// ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/chooser/dynamicsize/idealclustersize 0 +// It is currently broken. + +const ( + // DefaultIdealDynamicSize is the default target ideal dynamic cluster + // size used for the initial cluster. + DefaultIdealDynamicSize = 5 + + // IdealDynamicSizePath is the path key used for the chooser. It usually + // gets used with a namespace prefix. + IdealDynamicSizePath = "/dynamicsize/idealclustersize" +) + +// DynamicSize is a simple implementation of the Chooser interface. This helps +// select which machines to add and remove as we elastically grow and shrink our +// cluster. +// TODO: think of a better name +type DynamicSize struct { + // IdealClusterSize is the ideal target size for this cluster. If it is + // set to zero, then it will use DefaultIdealDynamicSize as the value. + IdealClusterSize uint16 + + data *Data // save for later + client interfaces.Client + + ctx context.Context + cancel func() + wg *sync.WaitGroup +} + +// Validate validates the struct. +func (obj *DynamicSize) Validate() error { + // TODO: if changed to zero, treat as a cluster shutdown signal + if obj.IdealClusterSize < 0 { + return fmt.Errorf("must choose a positive IdealClusterSize value") + } + return nil +} + +// Init accepts some useful data and handles. +func (obj *DynamicSize) Init(data *Data) error { + if data.Hostname == "" { + return fmt.Errorf("can't Init with empty Hostname value") + } + if data.Logf == nil { + return fmt.Errorf("no Logf function was specified") + } + + if obj.IdealClusterSize == 0 { + obj.IdealClusterSize = DefaultIdealDynamicSize + } + + obj.data = data + obj.wg = &sync.WaitGroup{} + return nil +} + +// Close runs some cleanup routines. +func (obj *DynamicSize) Close() error { + return nil +} + +// Connect is called to accept an etcd.KV namespace that we can use. +func (obj *DynamicSize) Connect(ctx context.Context, client interfaces.Client) error { + obj.client = client + obj.ctx, obj.cancel = context.WithCancel(ctx) + size, err := DynamicSizeGet(obj.ctx, obj.client) + if err == interfaces.ErrNotExist || (err == nil && size <= 0) { + // unset, set in running cluster + changed, err := DynamicSizeSet(obj.ctx, obj.client, obj.IdealClusterSize) + if err == nil && changed { + obj.data.Logf("set dynamic cluster size to: %d", obj.IdealClusterSize) + } + return err + } else if err == nil && size >= 1 { + // unset, get from running cluster (use the valid cluster value) + if obj.IdealClusterSize != size { + obj.data.Logf("using dynamic cluster size of: %d", size) + } + obj.IdealClusterSize = size // get from exiting cluster... + } + + return err +} + +// Disconnect is called to cancel our use of the etcd.KV connection. +func (obj *DynamicSize) Disconnect() error { + if obj.client != nil { // if connect was not called, don't call this... + obj.cancel() + } + obj.wg.Wait() + return nil +} + +// Watch is called to send events anytime we might want to change membership. It +// is also used to watch for changes so that when we get an event, we know to +// honour the change in Choose. +func (obj *DynamicSize) Watch() (chan error, error) { + // NOTE: The body of this function is very similar to the logic in the + // simple client.Watcher implementation that wraps ComplexWatcher. + path := IdealDynamicSizePath + cancelCtx, cancel := context.WithCancel(obj.ctx) + info, err := obj.client.ComplexWatcher(cancelCtx, path) + if err != nil { + defer cancel() + return nil, err + } + ch := make(chan error) + obj.wg.Add(1) // hook in to global wait group + go func() { + defer obj.wg.Done() + defer close(ch) + defer cancel() + var data *interfaces.WatcherData + var ok bool + for { + select { + case data, ok = <-info.Events: // read + if !ok { + return + } + case <-cancelCtx.Done(): + continue // wait for ch closure, but don't block + } + + size := obj.IdealClusterSize + for _, event := range data.Events { // apply each event + if event.Type != etcd.EventTypePut { + continue + } + key := string(event.Kv.Key) + key = key[len(data.Path):] // remove path prefix + val := string(event.Kv.Value) + if val == "" { + continue // ignore empty values + } + i, err := strconv.Atoi(val) + if err != nil { + continue // ignore bad values + } + size = uint16(i) // save + } + if size == obj.IdealClusterSize { + continue // no change + } + // set before sending the signal + obj.IdealClusterSize = size + + if size == 0 { // zero means shutdown + obj.data.Logf("impending cluster shutdown...") + } else { + obj.data.Logf("got new dynamic cluster size of: %d", size) + } + + select { + case ch <- data.Err: // send (might be nil!) + case <-cancelCtx.Done(): + continue // wait for ch closure, but don't block + } + } + }() + return ch, nil +} + +// Choose accepts a list of current membership, and a list of volunteers. From +// that we can decide who we should add and remove. We return a list of those +// nominated, and unnominated users respectively. +func (obj *DynamicSize) Choose(membership, volunteers etcdtypes.URLsMap) ([]string, []string, error) { + // Possible nominees include anyone that has volunteered, but that + // isn't a member. + if obj.data.Debug { + obj.data.Logf("goal: %d members", obj.IdealClusterSize) + } + nominees := []string{} + for hostname := range volunteers { + if _, exists := membership[hostname]; !exists { + nominees = append(nominees, hostname) + } + } + + // Possible quitters include anyone that is a member, but that is not a + // volunteer. (They must have unvolunteered.) + quitters := []string{} + for hostname := range membership { + if _, exists := volunteers[hostname]; !exists { + quitters = append(quitters, hostname) + } + } + + // What we want to know... + nominated := []string{} + unnominated := []string{} + + // We should always only add ONE member at a time! + // TODO: is it okay to remove multiple members at the same time? + if len(nominees) > 0 && len(membership)-len(quitters) < int(obj.IdealClusterSize) { + //unnominated = []string{} // only do one operation at a time + nominated = []string{nominees[0]} // FIXME: use a better picker algorithm + + } else if len(quitters) == 0 && len(membership) > int(obj.IdealClusterSize) { // too many members + //nominated = []string{} // only do one operation at a time + for kicked := range membership { + // don't kick ourself unless we are the only one left... + if kicked != obj.data.Hostname || (obj.IdealClusterSize == 0 && len(membership) == 1) { + unnominated = []string{kicked} // FIXME: use a better picker algorithm + break + } + } + } else if len(quitters) > 0 { // must do these before new unvolunteers + unnominated = quitters // get rid of the quitters + } + + return nominated, unnominated, nil // perform these changes +} + +// DynamicSizeGet gets the currently set dynamic size set in the cluster. +func DynamicSizeGet(ctx context.Context, client interfaces.Client) (uint16, error) { + key := IdealDynamicSizePath + m, err := client.Get(ctx, key) // (map[string]string, error) + if err != nil { + return 0, err + } + val, exists := m[IdealDynamicSizePath] + if !exists { + return 0, interfaces.ErrNotExist + } + i, err := strconv.Atoi(val) + if err != nil { + return 0, fmt.Errorf("bad value") + } + return uint16(i), nil +} + +// DynamicSizeSet sets the dynamic size in the cluster. It returns true if it +// changed or set the value. +func DynamicSizeSet(ctx context.Context, client interfaces.Client, size uint16) (bool, error) { + key := IdealDynamicSizePath + val := strconv.FormatUint(uint64(size), 10) // fmt.Sprintf("%d", size) + + ifCmps := []etcd.Cmp{ + etcd.Compare(etcd.Value(key), "=", val), // desired state + } + elseOps := []etcd.Op{etcd.OpPut(key, val)} + + resp, err := client.Txn(ctx, ifCmps, nil, elseOps) + if err != nil { + return false, err + } + // succeeded is set to true if the compare evaluated to true + changed := !resp.Succeeded + + return changed, err +} diff --git a/etcd/client.go b/etcd/client.go deleted file mode 100644 index e1d34839..00000000 --- a/etcd/client.go +++ /dev/null @@ -1,95 +0,0 @@ -// Mgmt -// Copyright (C) 2013-2019+ James Shubin and the project contributors -// Written by James Shubin and the project contributors -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - -package etcd - -import ( - "time" - - "github.com/purpleidea/mgmt/util/errwrap" - - etcd "github.com/coreos/etcd/clientv3" // "clientv3" - context "golang.org/x/net/context" -) - -// ClientEtcd provides a simple etcd client for deploy and status operations. -type ClientEtcd struct { - Seeds []string // list of endpoints to try to connect - - client *etcd.Client -} - -// GetClient returns a handle to the raw etcd client object. -func (obj *ClientEtcd) GetClient() *etcd.Client { - return obj.client -} - -// GetConfig returns the config struct to be used for the etcd client connect. -func (obj *ClientEtcd) GetConfig() etcd.Config { - cfg := etcd.Config{ - Endpoints: obj.Seeds, - // RetryDialer chooses the next endpoint to use - // it comes with a default dialer if unspecified - DialTimeout: 5 * time.Second, - } - return cfg -} - -// Connect connects the client to a server, and then builds the *API structs. -// If reconnect is true, it will force a reconnect with new config endpoints. -func (obj *ClientEtcd) Connect() error { - if obj.client != nil { // memoize - return nil - } - - var err error - cfg := obj.GetConfig() - obj.client, err = etcd.New(cfg) // connect! - if err != nil { - return errwrap.Wrapf(err, "client connect error") - } - return nil -} - -// Destroy cleans up the entire etcd client connection. -func (obj *ClientEtcd) Destroy() error { - err := obj.client.Close() - //obj.wg.Wait() - return err -} - -// Get runs a get on the client connection. This has the same signature as our -// EmbdEtcd Get function. -func (obj *ClientEtcd) Get(path string, opts ...etcd.OpOption) (map[string]string, error) { - resp, err := obj.client.Get(context.TODO(), path, opts...) - if err != nil || resp == nil { - return nil, err - } - - // TODO: write a resp.ToMap() function on https://godoc.org/github.com/coreos/etcd/etcdserver/etcdserverpb#RangeResponse - result := make(map[string]string) - for _, x := range resp.Kvs { - result[string(x.Key)] = string(x.Value) - } - return result, nil -} - -// Txn runs a transaction on the client connection. This has the same signature -// as our EmbdEtcd Txn function. -func (obj *ClientEtcd) Txn(ifcmps []etcd.Cmp, thenops, elseops []etcd.Op) (*etcd.TxnResponse, error) { - return obj.client.KV.Txn(context.TODO()).If(ifcmps...).Then(thenops...).Else(elseops...).Commit() -} diff --git a/etcd/resources.go b/etcd/client/resources/resources.go similarity index 67% rename from etcd/resources.go rename to etcd/client/resources/resources.go index c5c52883..b310dca8 100644 --- a/etcd/resources.go +++ b/etcd/client/resources/resources.go @@ -15,60 +15,43 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . -package etcd +package resources import ( + "context" "fmt" - "log" "strings" "github.com/purpleidea/mgmt/engine" engineUtil "github.com/purpleidea/mgmt/engine/util" + "github.com/purpleidea/mgmt/etcd/interfaces" "github.com/purpleidea/mgmt/util" etcd "github.com/coreos/etcd/clientv3" ) +const ( + ns = "" // in case we want to add one back in +) + // WatchResources returns a channel that outputs events when exported resources // change. // TODO: Filter our watch (on the server side if possible) based on the // collection prefixes and filters that we care about... -func WatchResources(obj *EmbdEtcd) chan error { - ch := make(chan error, 1) // buffer it so we can measure it - path := fmt.Sprintf("%s/exported/", NS) - callback := func(re *RE) error { - // TODO: is this even needed? it used to happen on conn errors - log.Printf("Etcd: Watch: Path: %v", path) // event - if re == nil || re.response.Canceled { - return fmt.Errorf("watch is empty") // will cause a CtxError+retry - } - // we normally need to check if anything changed since the last - // event, since a set (export) with no changes still causes the - // watcher to trigger and this would cause an infinite loop. we - // don't need to do this check anymore because we do the export - // transactionally, and only if a change is needed. since it is - // atomic, all the changes arrive together which avoids dupes!! - if len(ch) == 0 { // send event only if one isn't pending - // this check avoids multiple events all queueing up and then - // being released continuously long after the changes stopped - // do not block! - ch <- nil // event - } - return nil - } - _, _ = obj.AddWatcher(path, callback, true, false, etcd.WithPrefix()) // no need to check errors - return ch +func WatchResources(ctx context.Context, client interfaces.Client) (chan error, error) { + path := fmt.Sprintf("%s/exported/", ns) + return client.Watcher(ctx, path, etcd.WithPrefix()) } // SetResources exports all of the resources which we pass in to etcd. -func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) error { +func SetResources(ctx context.Context, client interfaces.Client, hostname string, resourceList []engine.Res) error { // key structure is $NS/exported/$hostname/resources/$uid = $data var kindFilter []string // empty to get from everyone hostnameFilter := []string{hostname} // this is not a race because we should only be reading keys which we // set, and there should not be any contention with other hosts here! - originals, err := GetResources(obj, hostnameFilter, kindFilter) + originals, err := GetResources(ctx, client, hostnameFilter, kindFilter) if err != nil { return err } @@ -81,10 +64,10 @@ func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) err ops := []etcd.Op{} // list of ops in this transaction for _, res := range resourceList { if res.Kind() == "" { - log.Fatalf("Etcd: SetResources: Error: Empty kind: %v", res.Name()) + return fmt.Errorf("empty kind: %s", res.Name()) } uid := fmt.Sprintf("%s/%s", res.Kind(), res.Name()) - path := fmt.Sprintf("%s/exported/%s/resources/%s", NS, hostname, uid) + path := fmt.Sprintf("%s/exported/%s/resources/%s", ns, hostname, uid) if data, err := engineUtil.ResToB64(res); err == nil { ifs = append(ifs, etcd.Compare(etcd.Value(path), "=", data)) // desired state ops = append(ops, etcd.OpPut(path, data)) @@ -106,10 +89,10 @@ func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) err // delete old, now unused resources here... for _, res := range originals { if res.Kind() == "" { - log.Fatalf("Etcd: SetResources: Error: Empty kind: %v", res.Name()) + return fmt.Errorf("empty kind: %s", res.Name()) } uid := fmt.Sprintf("%s/%s", res.Kind(), res.Name()) - path := fmt.Sprintf("%s/exported/%s/resources/%s", NS, hostname, uid) + path := fmt.Sprintf("%s/exported/%s/resources/%s", ns, hostname, uid) if match(res, resourceList) { // if we match, no need to delete! continue @@ -124,9 +107,9 @@ func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) err // it's important to do this in one transaction, and atomically, because // this way, we only generate one watch event, and only when it's needed if hasDeletes { // always run, ifs don't matter - _, err = obj.Txn(nil, ops, nil) // TODO: does this run? it should! + _, err = client.Txn(ctx, nil, ops, nil) // TODO: does this run? it should! } else { - _, err = obj.Txn(ifs, nil, ops) // TODO: do we need to look at response? + _, err = client.Txn(ctx, ifs, nil, ops) // TODO: do we need to look at response? } return err } @@ -136,11 +119,11 @@ func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) err // TODO: Expand this with a more powerful filter based on what we eventually // support in our collect DSL. Ideally a server side filter like WithFilter() // We could do this if the pattern was $NS/exported/$kind/$hostname/$uid = $data. -func GetResources(obj *EmbdEtcd, hostnameFilter, kindFilter []string) ([]engine.Res, error) { +func GetResources(ctx context.Context, client interfaces.Client, hostnameFilter, kindFilter []string) ([]engine.Res, error) { // key structure is $NS/exported/$hostname/resources/$uid = $data - path := fmt.Sprintf("%s/exported/", NS) + path := fmt.Sprintf("%s/exported/", ns) resourceList := []engine.Res{} - keyMap, err := obj.Get(path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend)) + keyMap, err := client.Get(ctx, path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend)) if err != nil { return nil, fmt.Errorf("could not get resources: %v", err) } @@ -160,7 +143,9 @@ func GetResources(obj *EmbdEtcd, hostnameFilter, kindFilter []string) ([]engine. if kind == "" { return nil, fmt.Errorf("unexpected kind chunk") } - + if name == "" { // TODO: should I check this? + return nil, fmt.Errorf("unexpected empty name") + } // FIXME: ideally this would be a server side filter instead! if len(hostnameFilter) > 0 && !util.StrInList(hostname, hostnameFilter) { continue @@ -171,9 +156,9 @@ func GetResources(obj *EmbdEtcd, hostnameFilter, kindFilter []string) ([]engine. continue } - if obj, err := engineUtil.B64ToRes(val); err == nil { - log.Printf("Etcd: Get: (Hostname, Kind, Name): (%s, %s, %s)", hostname, kind, name) - resourceList = append(resourceList, obj) + if res, err := engineUtil.B64ToRes(val); err == nil { + //obj.Logf("Get: (Hostname, Kind, Name): (%s, %s, %s)", hostname, kind, name) + resourceList = append(resourceList, res) } else { return nil, fmt.Errorf("can't convert from B64: %v", err) } diff --git a/etcd/client/simple.go b/etcd/client/simple.go new file mode 100644 index 00000000..f025b99b --- /dev/null +++ b/etcd/client/simple.go @@ -0,0 +1,484 @@ +// Mgmt +// Copyright (C) 2013-2019+ James Shubin and the project contributors +// Written by James Shubin and the project contributors +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package client + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/purpleidea/mgmt/etcd/interfaces" + "github.com/purpleidea/mgmt/util/errwrap" + + etcd "github.com/coreos/etcd/clientv3" // "clientv3" + "github.com/coreos/etcd/clientv3/namespace" +) + +// method represents the method we used to build the simple client. +type method uint8 + +const ( + methodError method = iota + methodSeeds + methodClient + methodNamespace +) + +// NewClientFromSeeds builds a new simple client by connecting to a list of +// seeds. +func NewClientFromSeeds(seeds []string) *Simple { + return &Simple{ + method: methodSeeds, + wg: &sync.WaitGroup{}, + + seeds: seeds, + } +} + +// NewClientFromSeedsNamespace builds a new simple client by connecting to a +// list of seeds and ensuring all key access is prefixed with a namespace. +func NewClientFromSeedsNamespace(seeds []string, ns string) *Simple { + return &Simple{ + method: methodSeeds, + wg: &sync.WaitGroup{}, + + seeds: seeds, + namespace: ns, + } +} + +// NewClientFromClient builds a new simple client by taking an existing client +// struct. It does not disconnect this when Close is called, as that is up to +// the parent, which is the owner of that client input struct. +func NewClientFromClient(client *etcd.Client) *Simple { + return &Simple{ + method: methodClient, + wg: &sync.WaitGroup{}, + + client: client, + } +} + +// NewClientFromNamespaceStr builds a new simple client by taking an existing +// client and a string namespace. Warning, this doesn't properly nest the +// namespaces. +func NewClientFromNamespaceStr(client *etcd.Client, ns string) *Simple { + if client == nil { + return &Simple{ + method: methodError, + err: fmt.Errorf("client is nil"), + } + } + kv := client.KV + w := client.Watcher + if ns != "" { // only layer if not empty + kv = namespace.NewKV(client.KV, ns) + w = namespace.NewWatcher(client.Watcher, ns) + } + + return &Simple{ + method: methodClient, // similar enough to this one to share it! + wg: &sync.WaitGroup{}, + + client: client, // store for GetClient() + kv: kv, + w: w, + } +} + +// NewClientFromSimple builds a simple client from an existing client interface +// which must be a simple client. This awkward method is required so that +// namespace nesting works properly, because the *etcd.Client doesn't directly +// pass through the namespace. I'd love to nuke this function, but it's good +// enough for now. +func NewClientFromSimple(client interfaces.Client, ns string) *Simple { + if client == nil { + return &Simple{ + method: methodError, + err: fmt.Errorf("client is nil"), + } + } + + simple, ok := client.(*Simple) + if !ok { + return &Simple{ + method: methodError, + err: fmt.Errorf("client is not simple"), + } + } + kv := simple.kv + w := simple.w + if ns != "" { // only layer if not empty + kv = namespace.NewKV(simple.kv, ns) + w = namespace.NewWatcher(simple.w, ns) + } + + return &Simple{ + method: methodNamespace, + wg: &sync.WaitGroup{}, + + client: client.GetClient(), // store for GetClient() + kv: kv, + w: w, + } +} + +// NewClientFromNamespace builds a new simple client by taking an existing set +// of interface API's that we might use. +func NewClientFromNamespace(client *etcd.Client, kv etcd.KV, w etcd.Watcher) *Simple { + return &Simple{ + method: methodNamespace, + wg: &sync.WaitGroup{}, + + client: client, // store for GetClient() + kv: kv, + w: w, + } +} + +// Simple provides a simple etcd client for deploy and status operations. You +// can set Debug and Logf after you've built this with one of the NewClient* +// methods. +type Simple struct { + Debug bool + Logf func(format string, v ...interface{}) + + method method + wg *sync.WaitGroup + + // err is the error we set when using methodError + err error + + // seeds is the list of endpoints to try to connect to. + seeds []string + namespace string + + // client is the etcd client connection. + client *etcd.Client + + // kv and w are the namespaced interfaces that we got passed. + kv etcd.KV + w etcd.Watcher +} + +// logf is a safe wrapper around the Logf parameter that doesn't panic if the +// user didn't pass a logger in. +func (obj *Simple) logf(format string, v ...interface{}) { + if obj.Logf == nil { + return + } + obj.Logf(format, v...) +} + +// config returns the config struct to be used for the etcd client connect. +func (obj *Simple) config() etcd.Config { + cfg := etcd.Config{ + Endpoints: obj.seeds, + // RetryDialer chooses the next endpoint to use + // it comes with a default dialer if unspecified + DialTimeout: 5 * time.Second, + } + return cfg +} + +// connect connects the client to a server, and then builds the *API structs. +func (obj *Simple) connect() error { + if obj.client != nil { // memoize + return nil + } + + var err error + cfg := obj.config() + obj.client, err = etcd.New(cfg) // connect! + if err != nil { + return errwrap.Wrapf(err, "client connect error") + } + obj.kv = obj.client.KV + obj.w = obj.client.Watcher + if obj.namespace != "" { // bonus feature of seeds method + obj.kv = namespace.NewKV(obj.client.KV, obj.namespace) + obj.w = namespace.NewWatcher(obj.client.Watcher, obj.namespace) + } + return nil +} + +// Init starts up the struct. +func (obj *Simple) Init() error { + // By the end of this, we must have obj.kv and obj.w available for use. + switch obj.method { + case methodError: + return obj.err // use the error we set + + case methodSeeds: + if len(obj.seeds) <= 0 { + return fmt.Errorf("zero seeds") + } + return obj.connect() + + case methodClient: + if obj.client == nil { + return fmt.Errorf("no client") + } + if obj.kv == nil { // overwrite if not specified! + obj.kv = obj.client.KV + } + if obj.w == nil { + obj.w = obj.client.Watcher + } + return nil + + case methodNamespace: + if obj.kv == nil || obj.w == nil { + return fmt.Errorf("empty namespace") + } + return nil + } + + return fmt.Errorf("unknown method: %+v", obj.method) +} + +// Close cleans up the struct after we're finished. +func (obj *Simple) Close() error { + defer obj.wg.Wait() + switch obj.method { + case methodError: // for consistency + return fmt.Errorf("did not Init") + + case methodSeeds: + return obj.client.Close() + + case methodClient: + // we we're given a client, so we don't own it or close it + return nil + + case methodNamespace: + return nil + } + + return fmt.Errorf("unknown method: %+v", obj.method) +} + +// GetClient returns a handle to an open etcd Client. This is needed for certain +// upstream API's that don't support passing in KV and Watcher instead. +func (obj *Simple) GetClient() *etcd.Client { + return obj.client +} + +// Set runs a set operation. If you'd like more information about whether a +// value changed or not, use Txn instead. +func (obj *Simple) Set(ctx context.Context, key, value string, opts ...etcd.OpOption) error { + // key is the full key path + resp, err := obj.kv.Put(ctx, key, value, opts...) + if obj.Debug { + obj.logf("set(%s): %v", key, resp) // bonus + } + return err +} + +// Get runs a get operation. +func (obj *Simple) Get(ctx context.Context, path string, opts ...etcd.OpOption) (map[string]string, error) { + resp, err := obj.kv.Get(ctx, path, opts...) + if err != nil { + return nil, err + } + if resp == nil { + return nil, fmt.Errorf("empty response") + } + + // TODO: write a resp.ToMap() function on https://godoc.org/github.com/coreos/etcd/etcdserver/etcdserverpb#RangeResponse + result := make(map[string]string) + for _, x := range resp.Kvs { + result[string(x.Key)] = string(x.Value) + } + return result, nil +} + +// Del runs a delete operation. +func (obj *Simple) Del(ctx context.Context, path string, opts ...etcd.OpOption) (int64, error) { + resp, err := obj.kv.Delete(ctx, path, opts...) + if err == nil { + return resp.Deleted, nil + } + return -1, err +} + +// Txn runs a transaction. +func (obj *Simple) Txn(ctx context.Context, ifCmps []etcd.Cmp, thenOps, elseOps []etcd.Op) (*etcd.TxnResponse, error) { + resp, err := obj.kv.Txn(ctx).If(ifCmps...).Then(thenOps...).Else(elseOps...).Commit() + if obj.Debug { + obj.logf("txn: %v", resp) // bonus + } + return resp, err +} + +// Watcher is a watcher that returns a chan of error's instead of a chan with +// all sorts of watcher data. This is useful when we only want an event signal, +// but we don't care about the specifics. +func (obj *Simple) Watcher(ctx context.Context, path string, opts ...etcd.OpOption) (chan error, error) { + cancelCtx, cancel := context.WithCancel(ctx) + info, err := obj.ComplexWatcher(cancelCtx, path, opts...) + if err != nil { + defer cancel() + return nil, err + } + ch := make(chan error) + obj.wg.Add(1) // hook in to global wait group + go func() { + defer obj.wg.Done() + defer close(ch) + defer cancel() + var data *interfaces.WatcherData + var ok bool + for { + select { + case data, ok = <-info.Events: // read + if !ok { + return + } + case <-cancelCtx.Done(): + continue // wait for ch closure, but don't block + } + + select { + case ch <- data.Err: // send (might be nil!) + case <-cancelCtx.Done(): + continue // wait for ch closure, but don't block + } + } + }() + return ch, nil +} + +// ComplexWatcher is a more capable watcher that also returns data information. +// This starts a watch request. It writes on a channel that you can follow to +// know when an event or an error occurs. It always sends one startup event. It +// will not return until the watch has been started. If it cannot start, then it +// will return an error. Remember to add the WithPrefix() option if you want to +// watch recursively. +// TODO: do we need to support retry and changed client connections? +// XXX: do we need to track last successful revision and retry from there? +// XXX: if so, use: +// lastRev := response.Header.Revision // TODO: +1 ? +// etcd.WithRev(rev) +func (obj *Simple) ComplexWatcher(ctx context.Context, path string, opts ...etcd.OpOption) (*interfaces.WatcherInfo, error) { + if obj.client == nil { // catch bugs, this often means programming error + return nil, fmt.Errorf("client is nil") // extra safety! + } + cancelCtx, cancel := context.WithCancel(ctx) + eventsChan := make(chan *interfaces.WatcherData) // channel of runtime errors + + var count uint8 + wg := &sync.WaitGroup{} + + // TODO: if we can detect the use of WithCreatedNotify, we don't need to + // hard-code it down below... https://github.com/coreos/etcd/issues/9689 + // XXX: proof of concept patch: https://github.com/coreos/etcd/pull/9705 + //for _, op := range opts { + // //if op.Cmp(etcd.WithCreatedNotify()) == nil { // would be best + // if etcd.OpOptionCmp(op, etcd.WithCreatedNotify()) == nil { + // count++ + // wg.Add(1) + // break + // } + //} + count++ + wg.Add(1) + + wOpts := []etcd.OpOption{ + etcd.WithCreatedNotify(), + } + wOpts = append(wOpts, opts...) + var err error + + obj.wg.Add(1) // hook in to global wait group + go func() { + defer obj.wg.Done() + defer close(eventsChan) + defer cancel() // it's safe to cancel() more than once! + ch := obj.w.Watch(cancelCtx, path, wOpts...) + for { + var resp etcd.WatchResponse + var ok bool + var created bool + select { + case resp, ok = <-ch: + if !ok { + if count > 0 { // closed before startup + // set err in parent scope! + err = fmt.Errorf("watch closed") + count-- + wg.Done() + } + return + } + + // the watch is now running! + if count > 0 && resp.Created { + created = true + count-- + wg.Done() + } + + isCanceled := resp.Canceled || resp.Err() == context.Canceled + // TODO: this might not be needed + if resp.Header.Revision == 0 { // by inspection + if obj.Debug { + obj.logf("watch: received empty message") // switched client connection + } + isCanceled = true + } + + if isCanceled { + data := &interfaces.WatcherData{ + Err: context.Canceled, + } + select { // send the error + case eventsChan <- data: + case <-ctx.Done(): + return + } + continue // channel should close shortly + } + } + + // TODO: consider processing the response data into a + // more useful form for the callback... + data := &interfaces.WatcherData{ + Created: created, + Path: path, + Header: resp.Header, + Events: resp.Events, + Err: resp.Err(), + } + + select { // send the event + case eventsChan <- data: + case <-ctx.Done(): + return + } + } + }() + + wg.Wait() // wait for created event before we return + + return &interfaces.WatcherInfo{ + Cancel: cancel, + Events: eventsChan, + }, err +} diff --git a/etcd/str.go b/etcd/client/str/str.go similarity index 66% rename from etcd/str.go rename to etcd/client/str/str.go index 0b5fb740..26408ece 100644 --- a/etcd/str.go +++ b/etcd/client/str/str.go @@ -15,20 +15,22 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . -package etcd +package str import ( - "errors" + "context" "fmt" + "github.com/purpleidea/mgmt/etcd/interfaces" "github.com/purpleidea/mgmt/util/errwrap" etcd "github.com/coreos/etcd/clientv3" + etcdutil "github.com/coreos/etcd/clientv3/clientv3util" ) -// ErrNotExist is returned when GetStr can not find the requested key. -// TODO: https://dave.cheney.net/2016/04/07/constant-errors -var ErrNotExist = errors.New("errNotExist") +const ( + ns = "" // in case we want to add one back in +) // WatchStr returns a channel which spits out events on key activity. // FIXME: It should close the channel when it's done, and spit out errors when @@ -37,37 +39,23 @@ var ErrNotExist = errors.New("errNotExist") // done, does that mean we leak go-routines since it might still be running, but // perhaps even blocked??? Could this cause a dead-lock? Should we instead return // some sort of struct which has a close method with it to ask for a shutdown? -func WatchStr(obj *EmbdEtcd, key string) chan error { +func WatchStr(ctx context.Context, client interfaces.Client, key string) (chan error, error) { // new key structure is $NS/strings/$key = $data - path := fmt.Sprintf("%s/strings/%s", NS, key) - ch := make(chan error, 1) - // FIXME: fix our API so that we get a close event on shutdown. - callback := func(re *RE) error { - // TODO: is this even needed? it used to happen on conn errors - //log.Printf("Etcd: Watch: Path: %v", path) // event - if re == nil || re.response.Canceled { - return fmt.Errorf("watch is empty") // will cause a CtxError+retry - } - if len(ch) == 0 { // send event only if one isn't pending - ch <- nil // event - } - return nil - } - _, _ = obj.AddWatcher(path, callback, true, false, etcd.WithPrefix()) // no need to check errors - return ch + path := fmt.Sprintf("%s/strings/%s", ns, key) + return client.Watcher(ctx, path) } // GetStr collects the string which matches a global namespace in etcd. -func GetStr(obj *EmbdEtcd, key string) (string, error) { +func GetStr(ctx context.Context, client interfaces.Client, key string) (string, error) { // new key structure is $NS/strings/$key = $data - path := fmt.Sprintf("%s/strings/%s", NS, key) - keyMap, err := obj.Get(path, etcd.WithPrefix()) + path := fmt.Sprintf("%s/strings/%s", ns, key) + keyMap, err := client.Get(ctx, path, etcd.WithPrefix()) if err != nil { return "", errwrap.Wrapf(err, "could not get strings in: %s", key) } if len(keyMap) == 0 { - return "", ErrNotExist + return "", interfaces.ErrNotExist } if count := len(keyMap); count != 1 { @@ -79,23 +67,21 @@ func GetStr(obj *EmbdEtcd, key string) (string, error) { return "", fmt.Errorf("path `%s` is missing", path) } - //log.Printf("Etcd: GetStr(%s): %s", key, val) return val, nil } // SetStr sets a key and hostname pair to a certain value. If the value is // nil, then it deletes the key. Otherwise the value should point to a string. // TODO: TTL or delete disconnect? -func SetStr(obj *EmbdEtcd, key string, data *string) error { +func SetStr(ctx context.Context, client interfaces.Client, key string, data *string) error { // key structure is $NS/strings/$key = $data - path := fmt.Sprintf("%s/strings/%s", NS, key) + path := fmt.Sprintf("%s/strings/%s", ns, key) ifs := []etcd.Cmp{} // list matching the desired state ops := []etcd.Op{} // list of ops in this transaction (then) els := []etcd.Op{} // list of ops in this transaction (else) if data == nil { // perform a delete - // TODO: use https://github.com/coreos/etcd/pull/7417 if merged - //ifs = append(ifs, etcd.KeyExists(path)) - ifs = append(ifs, etcd.Compare(etcd.Version(path), ">", 0)) + ifs = append(ifs, etcdutil.KeyExists(path)) + //ifs = append(ifs, etcd.Compare(etcd.Version(path), ">", 0)) ops = append(ops, etcd.OpDelete(path)) } else { data := *data // get the real value @@ -105,6 +91,6 @@ func SetStr(obj *EmbdEtcd, key string, data *string) error { // it's important to do this in one transaction, and atomically, because // this way, we only generate one watch event, and only when it's needed - _, err := obj.Txn(ifs, ops, els) // TODO: do we need to look at response? + _, err := client.Txn(ctx, ifs, ops, els) // TODO: do we need to look at response? return errwrap.Wrapf(err, "could not set strings in: %s", key) } diff --git a/etcd/strmap.go b/etcd/client/strmap/strmap.go similarity index 71% rename from etcd/strmap.go rename to etcd/client/strmap/strmap.go index 9d92ffac..91ae7e3e 100644 --- a/etcd/strmap.go +++ b/etcd/client/strmap/strmap.go @@ -15,50 +15,43 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . -package etcd +package strmap import ( + "context" "fmt" "strings" + "github.com/purpleidea/mgmt/etcd/interfaces" "github.com/purpleidea/mgmt/util" "github.com/purpleidea/mgmt/util/errwrap" etcd "github.com/coreos/etcd/clientv3" + etcdutil "github.com/coreos/etcd/clientv3/clientv3util" +) + +const ( + ns = "" // in case we want to add one back in ) // WatchStrMap returns a channel which spits out events on key activity. // FIXME: It should close the channel when it's done, and spit out errors when // something goes wrong. -func WatchStrMap(obj *EmbdEtcd, key string) chan error { +func WatchStrMap(ctx context.Context, client interfaces.Client, key string) (chan error, error) { // new key structure is $NS/strings/$key/$hostname = $data - path := fmt.Sprintf("%s/strings/%s", NS, key) - ch := make(chan error, 1) - // FIXME: fix our API so that we get a close event on shutdown. - callback := func(re *RE) error { - // TODO: is this even needed? it used to happen on conn errors - //log.Printf("Etcd: Watch: Path: %v", path) // event - if re == nil || re.response.Canceled { - return fmt.Errorf("watch is empty") // will cause a CtxError+retry - } - if len(ch) == 0 { // send event only if one isn't pending - ch <- nil // event - } - return nil - } - _, _ = obj.AddWatcher(path, callback, true, false, etcd.WithPrefix()) // no need to check errors - return ch + path := fmt.Sprintf("%s/strings/%s", ns, key) + return client.Watcher(ctx, path, etcd.WithPrefix()) } // GetStrMap collects all of the strings which match a namespace in etcd. -func GetStrMap(obj *EmbdEtcd, hostnameFilter []string, key string) (map[string]string, error) { +func GetStrMap(ctx context.Context, client interfaces.Client, hostnameFilter []string, key string) (map[string]string, error) { // old key structure is $NS/strings/$hostname/$key = $data // new key structure is $NS/strings/$key/$hostname = $data // FIXME: if we have the $key as the last token (old key structure), we // can allow the key to contain the slash char, otherwise we need to // verify that one isn't present in the input string. - path := fmt.Sprintf("%s/strings/%s", NS, key) - keyMap, err := obj.Get(path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend)) + path := fmt.Sprintf("%s/strings/%s", ns, key) + keyMap, err := client.Get(ctx, path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend)) if err != nil { return nil, errwrap.Wrapf(err, "could not get strings in: %s", key) } @@ -91,16 +84,15 @@ func GetStrMap(obj *EmbdEtcd, hostnameFilter []string, key string) (map[string]s // SetStrMap sets a key and hostname pair to a certain value. If the value is // nil, then it deletes the key. Otherwise the value should point to a string. // TODO: TTL or delete disconnect? -func SetStrMap(obj *EmbdEtcd, hostname, key string, data *string) error { +func SetStrMap(ctx context.Context, client interfaces.Client, hostname, key string, data *string) error { // key structure is $NS/strings/$key/$hostname = $data - path := fmt.Sprintf("%s/strings/%s/%s", NS, key, hostname) + path := fmt.Sprintf("%s/strings/%s/%s", ns, key, hostname) ifs := []etcd.Cmp{} // list matching the desired state ops := []etcd.Op{} // list of ops in this transaction (then) els := []etcd.Op{} // list of ops in this transaction (else) if data == nil { // perform a delete - // TODO: use https://github.com/coreos/etcd/pull/7417 if merged - //ifs = append(ifs, etcd.KeyExists(path)) - ifs = append(ifs, etcd.Compare(etcd.Version(path), ">", 0)) + ifs = append(ifs, etcdutil.KeyExists(path)) + //ifs = append(ifs, etcd.Compare(etcd.Version(path), ">", 0)) ops = append(ops, etcd.OpDelete(path)) } else { data := *data // get the real value @@ -110,6 +102,6 @@ func SetStrMap(obj *EmbdEtcd, hostname, key string, data *string) error { // it's important to do this in one transaction, and atomically, because // this way, we only generate one watch event, and only when it's needed - _, err := obj.Txn(ifs, ops, els) // TODO: do we need to look at response? + _, err := client.Txn(ctx, ifs, ops, els) // TODO: do we need to look at response? return errwrap.Wrapf(err, "could not set strings in: %s", key) } diff --git a/etcd/converger.go b/etcd/converger.go new file mode 100644 index 00000000..b4d72ae7 --- /dev/null +++ b/etcd/converger.go @@ -0,0 +1,49 @@ +// Mgmt +// Copyright (C) 2013-2019+ James Shubin and the project contributors +// Written by James Shubin and the project contributors +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package etcd + +import ( + "context" + "fmt" + + "github.com/purpleidea/mgmt/util/errwrap" + + etcd "github.com/coreos/etcd/clientv3" +) + +// setHostnameConverged sets whether a specific hostname is converged. +func (obj *EmbdEtcd) setHostnameConverged(ctx context.Context, hostname string, isConverged bool) error { + if obj.Debug { + obj.Logf("setHostnameConverged(%s): %t", hostname, isConverged) + defer obj.Logf("setHostnameConverged(%s): done!", hostname) + } + + key := fmt.Sprintf(obj.NS+convergedPathFmt, hostname) + data := fmt.Sprintf("%t", isConverged) + + // XXX: bug: https://github.com/etcd-io/etcd/issues/10566 + // XXX: reverse things with els to workaround the bug :( + //ifs := []etcd.Cmp{etcd.Compare(etcd.Value(key), "!=", data)} // desired state + //ops := []etcd.Op{etcd.OpPut(key, data, etcd.WithLease(obj.leaseID))} + ifs := []etcd.Cmp{etcd.Compare(etcd.Value(key), "=", data)} // desired state + ifs = append(ifs, etcd.Compare(etcd.LeaseValue(key), "=", obj.leaseID)) + els := []etcd.Op{etcd.OpPut(key, data, etcd.WithLease(obj.leaseID))} + + _, err := obj.client.Txn(ctx, ifs, nil, els) + return errwrap.Wrapf(err, "set hostname converged failed") +} diff --git a/etcd/deploy.go b/etcd/deployer/deployer.go similarity index 64% rename from etcd/deploy.go rename to etcd/deployer/deployer.go index b9c3fccc..90523807 100644 --- a/etcd/deploy.go +++ b/etcd/deployer/deployer.go @@ -15,16 +15,20 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . -package etcd +package deployer import ( + "context" "fmt" "strconv" "strings" + "sync" + "github.com/purpleidea/mgmt/etcd/interfaces" "github.com/purpleidea/mgmt/util/errwrap" etcd "github.com/coreos/etcd/clientv3" + etcdutil "github.com/coreos/etcd/clientv3/clientv3util" ) const ( @@ -33,34 +37,52 @@ const ( hashPath = "hash" ) -// WatchDeploy returns a channel which spits out events on new deploy activity. -// FIXME: It should close the channel when it's done, and spit out errors when -// something goes wrong. -func WatchDeploy(obj *EmbdEtcd) chan error { - // key structure is $NS/deploy/$id/payload = $data - path := fmt.Sprintf("%s/%s/", NS, deployPath) - ch := make(chan error, 1) - // FIXME: fix our API so that we get a close event on shutdown. - callback := func(re *RE) error { - // TODO: is this even needed? it used to happen on conn errors - //log.Printf("Etcd: Watch: Path: %v", path) // event - if re == nil || re.response.Canceled { - return fmt.Errorf("watch is empty") // will cause a CtxError+retry - } - if len(ch) == 0 { // send event only if one isn't pending - ch <- nil // event - } - return nil +// SimpleDeploy is a deploy struct that provides all of the needed deploy +// methods. It requires that you give it a Client interface so that it can +// perform its remote work. You must call Init before you use it, and Close when +// you are done. +type SimpleDeploy struct { + Client interfaces.Client + + Debug bool + Logf func(format string, v ...interface{}) + + ns string // TODO: if we ever need to hardcode a base path + wg *sync.WaitGroup +} + +// Init validates the deploy structure and prepares it for first use. +func (obj *SimpleDeploy) Init() error { + if obj.Client == nil { + return fmt.Errorf("the Client was not specified") } - _, _ = obj.AddWatcher(path, callback, true, false, etcd.WithPrefix()) // no need to check errors - return ch + obj.wg = &sync.WaitGroup{} + return nil +} + +// Close cleans up after using the deploy struct and waits for any ongoing +// watches to exit before it returns. +func (obj *SimpleDeploy) Close() error { + obj.wg.Wait() + return nil +} + +// WatchDeploy returns a channel which spits out events on new deploy activity. +// It closes the channel when it's done, and spits out errors when something +// goes wrong. If it can't start up, it errors immediately. The returned channel +// is buffered, so that a quick succession of events will get discarded. +func (obj *SimpleDeploy) WatchDeploy(ctx context.Context) (chan error, error) { + // key structure is $NS/deploy/$id/payload = $data + path := fmt.Sprintf("%s/%s/", obj.ns, deployPath) + // FIXME: obj.wg.Add(1) && obj.wg.Done() + return obj.Client.Watcher(ctx, path, etcd.WithPrefix()) } // GetDeploys gets all the available deploys. -func GetDeploys(obj Client) (map[uint64]string, error) { +func (obj *SimpleDeploy) GetDeploys(ctx context.Context) (map[uint64]string, error) { // key structure is $NS/deploy/$id/payload = $data - path := fmt.Sprintf("%s/%s/", NS, deployPath) - keyMap, err := obj.Get(path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend)) + path := fmt.Sprintf("%s/%s/", obj.ns, deployPath) + keyMap, err := obj.Client.Get(ctx, path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend)) if err != nil { return nil, errwrap.Wrapf(err, "could not get deploy") } @@ -86,7 +108,7 @@ func GetDeploys(obj Client) (map[uint64]string, error) { } // TODO: do some sort of filtering here? - //log.Printf("Etcd: GetDeploys(%s): Id => Data: %d => %s", key, id, val) + //obj.Logf("GetDeploys(%s): Id => Data: %d => %s", key, id, val) result[id] = val } return result, nil @@ -107,8 +129,8 @@ func calculateMax(deploys map[uint64]string) uint64 { // an id of 0, you'll get back an empty deploy without error. This is useful so // that you can pass through this function easily. // FIXME: implement this more efficiently so that it doesn't have to download *all* the old deploys from etcd! -func GetDeploy(obj Client, id uint64) (string, error) { - result, err := GetDeploys(obj) +func (obj *SimpleDeploy) GetDeploy(ctx context.Context, id uint64) (string, error) { + result, err := obj.GetDeploys(ctx) if err != nil { return "", err } @@ -130,9 +152,9 @@ func GetDeploy(obj Client, id uint64) (string, error) { // zero. You must increment the returned value by one when you add a deploy. If // two or more clients race for this deploy id, then the loser is not committed, // and must repeat this GetMaxDeployID process until it succeeds with a commit! -func GetMaxDeployID(obj Client) (uint64, error) { +func (obj *SimpleDeploy) GetMaxDeployID(ctx context.Context) (uint64, error) { // TODO: this was all implemented super inefficiently, fix up for perf! - deploys, err := GetDeploys(obj) // get previous deploys + deploys, err := obj.GetDeploys(ctx) // get previous deploys if err != nil { return 0, errwrap.Wrapf(err, "error getting previous deploys") } @@ -148,29 +170,28 @@ func GetMaxDeployID(obj Client) (uint64, error) { // contributors pushing conflicting deploys. This isn't git specific, and so any // arbitrary string hash can be used. // FIXME: prune old deploys from the store when they aren't needed anymore... -func AddDeploy(obj Client, id uint64, hash, pHash string, data *string) error { +func (obj *SimpleDeploy) AddDeploy(ctx context.Context, id uint64, hash, pHash string, data *string) error { // key structure is $NS/deploy/$id/payload = $data // key structure is $NS/deploy/$id/hash = $hash - path := fmt.Sprintf("%s/%s/%d/%s", NS, deployPath, id, payloadPath) - tPath := fmt.Sprintf("%s/%s/%d/%s", NS, deployPath, id, hashPath) + path := fmt.Sprintf("%s/%s/%d/%s", obj.ns, deployPath, id, payloadPath) + tPath := fmt.Sprintf("%s/%s/%d/%s", obj.ns, deployPath, id, hashPath) ifs := []etcd.Cmp{} // list matching the desired state ops := []etcd.Op{} // list of ops in this transaction (then) - // TODO: use https://github.com/coreos/etcd/pull/7417 if merged // we're append only, so ensure this unique deploy id doesn't exist - ifs = append(ifs, etcd.Compare(etcd.Version(path), "=", 0)) // KeyMissing - //ifs = append(ifs, etcd.KeyMissing(path)) + //ifs = append(ifs, etcd.Compare(etcd.Version(path), "=", 0)) // KeyMissing + ifs = append(ifs, etcdutil.KeyMissing(path)) // don't look for previous deploy if this is the first deploy ever if id > 1 { // we append sequentially, so ensure previous key *does* exist - prev := fmt.Sprintf("%s/%s/%d/%s", NS, deployPath, id-1, payloadPath) - ifs = append(ifs, etcd.Compare(etcd.Version(prev), ">", 0)) // KeyExists - //ifs = append(ifs, etcd.KeyExists(prev)) + prev := fmt.Sprintf("%s/%s/%d/%s", obj.ns, deployPath, id-1, payloadPath) + //ifs = append(ifs, etcd.Compare(etcd.Version(prev), ">", 0)) // KeyExists + ifs = append(ifs, etcdutil.KeyExists(prev)) if hash != "" && pHash != "" { // does the previously stored hash match what we expect? - prevHash := fmt.Sprintf("%s/%s/%d/%s", NS, deployPath, id-1, hashPath) + prevHash := fmt.Sprintf("%s/%s/%d/%s", obj.ns, deployPath, id-1, hashPath) ifs = append(ifs, etcd.Compare(etcd.Value(prevHash), "=", pHash)) } } @@ -182,7 +203,7 @@ func AddDeploy(obj Client, id uint64, hash, pHash string, data *string) error { // it's important to do this in one transaction, and atomically, because // this way, we only generate one watch event, and only when it's needed - result, err := obj.Txn(ifs, ops, nil) + result, err := obj.Client.Txn(ctx, ifs, ops, nil) if err != nil { return errwrap.Wrapf(err, "error creating deploy id %d", id) } diff --git a/etcd/interfaces.go b/etcd/error.go similarity index 68% rename from etcd/interfaces.go rename to etcd/error.go index 71bf049f..dca8ef87 100644 --- a/etcd/interfaces.go +++ b/etcd/error.go @@ -18,13 +18,10 @@ package etcd import ( - etcd "github.com/coreos/etcd/clientv3" // "clientv3" + "github.com/purpleidea/mgmt/etcd/interfaces" ) -// Client provides a simple interface specification for client requests. Both -// EmbdEtcd and ClientEtcd implement this. -type Client interface { - // TODO: add more method signatures - Get(path string, opts ...etcd.OpOption) (map[string]string, error) - Txn(ifcmps []etcd.Cmp, thenops, elseops []etcd.Op) (*etcd.TxnResponse, error) -} +const ( + // errInconsistentApply means applyDeltaEvents wasn't consistent. + errInconsistentApply = interfaces.Error("inconsistent apply") +) diff --git a/etcd/etcd.go b/etcd/etcd.go index 6042781f..9a081906 100644 --- a/etcd/etcd.go +++ b/etcd/etcd.go @@ -15,81 +15,165 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . -// TODO: Add TTL's (eg: volunteering) -// TODO: Remove race around leader operations -// TODO: Fix server reuse issue (bind: address already in use) -// TODO: Fix unstarted member -// TODO: Fix excessive StartLoop/FinishLoop -// TODO: Add VIP for servers (incorporate with net resource) -// TODO: Auto assign ports/ip's for peers (if possible) -// TODO: Fix godoc +// TODO: remove race around leader operations +// TODO: fix unstarted member +// TODO: add VIP for servers (incorporate with net resource) +// TODO: auto assign ports/ip's for peers (if possible) +// TODO: check the shutdown ordering, so everything unrolls to a shutdown +// TODO: add the converger Register/Unregister stuff and timers if needed -// Package etcd implements the distributed key value store integration. -// This also takes care of managing and clustering the embedded etcd server. -// The elastic etcd algorithm works in the following way: -// * When you start up mgmt, you can pass it a list of seeds. -// * If no seeds are given, then assume you are the first server and startup. -// * If a seed is given, connect as a client, and optionally volunteer to be a server. -// * All volunteering clients should listen for a message from the master for nomination. -// * If a client has been nominated, it should startup a server. -// * All servers should listen for their nomination to be removed and shutdown if so. -// * The elected leader should decide who to nominate/unnominate to keep the right number of servers. +// Package etcd implements the distributed key value store and fs integration. +// This also takes care of managing and clustering of the embedded etcd server. +// The automatic clustering is considered experimental. If you require a more +// robust, battle-test etcd cluster, then manage your own, and point each mgmt +// agent at it with --seeds and --no-server. // -// Smoke testing: -// mkdir /tmp/mgmt{A..E} -// ./mgmt run --hostname h1 --tmp-prefix --no-pgp yaml --yaml examples/yaml/etcd1a.yaml -// ./mgmt run --hostname h2 --tmp-prefix --no-pgp --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 yaml --yaml examples/yaml/etcd1b.yaml -// ./mgmt run --hostname h3 --tmp-prefix --no-pgp --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 yaml --yaml examples/yaml/etcd1c.yaml -// ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/idealClusterSize 3 -// ./mgmt run --hostname h4 --tmp-prefix --no-pgp --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 yaml --yaml examples/yaml/etcd1d.yaml -// ./mgmt run --hostname h5 --tmp-prefix --no-pgp --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2387 --server-urls http://127.0.0.1:2388 yaml --yaml examples/yaml/etcd1e.yaml -// ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 member list -// ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2381 put /_mgmt/idealClusterSize 5 -// ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2381 member list +// Algorithm +// +// The elastic etcd algorithm works in the following way: +// +// * When you start up mgmt, you can pass it a list of seeds. +// +// * If no seeds are given, then assume you are the first server and startup. +// +// * If a seed is given, connect as a client, and volunteer to be a server. +// +// * All volunteering clients should listen for a message for nomination. +// +// * If a client has been nominated, it should startup a server. +// +// * A server should shutdown if its nomination is removed. +// +// * The elected leader should decide who to nominate/unnominate as needed. +// +// Notes +// +// If you attempt to add a new member to the cluster with a duplicate hostname, +// then the behaviour is undefined, and you could bork your cluster. This is not +// recommended or supported. Please ensure that your hostnames are unique. +// +// A single ^C requests an orderly shutdown, however a third ^C will ask etcd to +// shutdown forcefully. It is not recommended that you use this option, it +// exists as a way to make exit easier if something deadlocked the cluster. If +// this was due to user error (eg: duplicate hostnames) then it was your fault, +// but if the member did not shutdown from a single ^C under normal +// circumstances, then please file a bug. +// +// There are currently some races in this implementation. In practice, this +// should not cause any adverse effects unless you simultaneously add or remove +// members at a high rate. Fixing these races will probably require some +// internal changes to etcd. Help is welcome if you're interested in working on +// this. +// +// Smoke testing +// +// Here is a simple way to test etcd clustering basics... +// +// ./mgmt run --tmp-prefix --no-pgp --hostname h1 empty +// ./mgmt run --tmp-prefix --no-pgp --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 empty +// ./mgmt run --tmp-prefix --no-pgp --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 empty +// ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/chooser/dynamicsize/idealclustersize 3 +// ./mgmt run --tmp-prefix --no-pgp --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 empty +// ./mgmt run --tmp-prefix --no-pgp --hostname h5 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2387 --server-urls http://127.0.0.1:2388 empty +// ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 member list +// ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2381 put /_mgmt/chooser/dynamicsize/idealclustersize 5 +// ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2381 member list +// +// Bugs +// +// A member might occasionally think that an endpoint still exists after it has +// already shutdown. This isn't a major issue, since if that endpoint doesn't +// respond, then it will automatically choose the next available one. To see +// this issue, turn on debugging and start: H1, H2, H3, then stop H2, and you +// might see that H3 still knows about H2. +// +// Shutting down a cluster by setting the idealclustersize to zero is currently +// buggy and not supported. Try this at your own risk. +// +// If a member is nominated, and it doesn't respond to the nominate event and +// startup, and we lost quorum to add it, then we could be in a blocked state. +// This can be improved upon if we can call memberRemove after a timeout. +// +// Adding new cluster members very quickly, might trigger a: +// `runtime error: error validating peerURLs ... member count is unequal` error. +// See: https://github.com/etcd-io/etcd/issues/10626 for more information. +// +// If you use the dynamic size feature to start and stop the server process, +// once it has already started and then stopped, it can't be re-started because +// of a bug in etcd that doesn't free the port. Instead you'll get a: +// `bind: address already in use` error. See: +// https://github.com/etcd-io/etcd/issues/6042 for more information. package etcd import ( - "bytes" - "errors" + "context" "fmt" - "log" - "math" "net/url" "os" - "path" "sort" - "strconv" "strings" "sync" "time" "github.com/purpleidea/mgmt/converger" - "github.com/purpleidea/mgmt/etcd/event" + "github.com/purpleidea/mgmt/etcd/chooser" + "github.com/purpleidea/mgmt/etcd/client" + "github.com/purpleidea/mgmt/etcd/interfaces" "github.com/purpleidea/mgmt/util" + "github.com/purpleidea/mgmt/util/errwrap" etcd "github.com/coreos/etcd/clientv3" // "clientv3" + "github.com/coreos/etcd/clientv3/concurrency" + "github.com/coreos/etcd/clientv3/namespace" "github.com/coreos/etcd/embed" - "github.com/coreos/etcd/etcdserver" - rpctypes "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes" etcdtypes "github.com/coreos/etcd/pkg/types" - raft "github.com/coreos/etcd/raft" - context "golang.org/x/net/context" - "google.golang.org/grpc" ) -// constant parameters which may need to be tweaked or customized const ( - NS = "/_mgmt" // root namespace for mgmt operations - seedSentinel = "_seed" // you must not name your hostname this - MaxStartServerTimeout = 60 // max number of seconds to wait for server to start - MaxStartServerRetries = 3 // number of times to retry starting the etcd server - maxClientConnectRetries = 5 // number of times to retry consecutive connect failures - selfRemoveTimeout = 3 // give unnominated members a chance to self exit - exitDelay = 3 // number of sec of inactivity after exit to clean up - DefaultIdealClusterSize = 5 // default ideal cluster size target for initial seed + // TODO: figure out a trailing slash convention... + // NominatedPath is the unprefixed path under which nominated hosts are + // stored. This is public so that other consumers can know to avoid this + // key prefix. + NominatedPath = "/nominated/" + nominatedPathFmt = NominatedPath + "%s" // takes a hostname on the end + + // VolunteerPath is the unprefixed path under which volunteering hosts + // are stored. This is public so that other consumers can know to avoid + // this key prefix. + VolunteerPath = "/volunteer/" + volunteerPathFmt = VolunteerPath + "%s" // takes a hostname on the end + + // EndpointsPath is the unprefixed path under which the advertised host + // endpoints are stored. This is public so that other consumers can know + // to avoid this key prefix. + EndpointsPath = "/endpoints/" + endpointsPathFmt = EndpointsPath + "%s" // takes a hostname on the end + + // ChooserPath is the unprefixed path under which the chooser algorithm + // may store data. This is public so that other consumers can know to + // avoid this key prefix. + ChooserPath = "/chooser" // all hosts share the same namespace + + // ConvergedPath is the unprefixed path under which the converger + // may store data. This is public so that other consumers can know to + // avoid this key prefix. + ConvergedPath = "/converged/" + convergedPathFmt = ConvergedPath + "%s" // takes a hostname on the end + + // SchedulerPath is the unprefixed path under which the scheduler + // may store data. This is public so that other consumers can know to + // avoid this key prefix. + SchedulerPath = "/scheduler/" + schedulerPathFmt = SchedulerPath + "%s" // takes a namespace on the end + + // DefaultClientURL is the default value that is used for client URLs. + // It is pulled from the upstream etcd package. DefaultClientURL = embed.DefaultListenClientURLs // 127.0.0.1:2379 - DefaultServerURL = embed.DefaultListenPeerURLs // 127.0.0.1:2380 + + // DefaultServerURL is the default value that is used for server URLs. + // It is pulled from the upstream etcd package. + DefaultServerURL = embed.DefaultListenPeerURLs // 127.0.0.1:2380 // DefaultMaxTxnOps is the maximum number of operations to run in a // single etcd transaction. If you exceed this limit, it is possible @@ -98,1777 +182,1247 @@ const ( // know so that we can analyze the situation, and increase this if // necessary. DefaultMaxTxnOps = 512 + + // RunStartupTimeout is the amount of time we will wait for regular run + // startup before cancelling it all. + RunStartupTimeout = 30 * time.Second + + // ClientDialTimeout is the DialTimeout option in the client config. + ClientDialTimeout = 5 * time.Second + + // ClientDialKeepAliveTime is the DialKeepAliveTime config value for the + // etcd client. It is recommended that you use this so that dead + // endpoints don't block any cluster operations. + ClientDialKeepAliveTime = 2 * time.Second // from etcdctl + // ClientDialKeepAliveTimeout is the DialKeepAliveTimeout config value + // for the etcd client. It is recommended that you use this so that dead + // endpoints don't block any cluster operations. + ClientDialKeepAliveTimeout = 6 * time.Second // from etcdctl + + // MemberChangeInterval is the polling interval to use when watching for + // member changes during add or remove. + MemberChangeInterval = 500 * time.Millisecond + + // SelfRemoveTimeout gives unnominated members a chance to self exit. + SelfRemoveTimeout = 10 * time.Second + + // ForceExitTimeout is the amount of time we will wait for a force exit + // to occur before cancelling it all. + ForceExitTimeout = 15 * time.Second + + // SessionTTL is the number of seconds to wait before a dead or + // unresponsive host has their volunteer keys removed from the cluster. + // This should be an integer multiple of seconds, since one second is + // the TTL precision used in etcd. + SessionTTL = 10 * time.Second // seconds + + // RequireLeaderCtx specifies whether the volunteer loop should use the + // WithRequireLeader ctx wrapper. It is unknown at this time if this + // would cause occasional events to be lost, more extensive testing is + // needed. + RequireLeaderCtx = false + + // ConvergerHostnameNamespace is a unique key used in the converger. + ConvergerHostnameNamespace = "etcd-hostname" ) -var ( - errApplyDeltaEventsInconsistent = errors.New("inconsistent key in ApplyDeltaEvents") -) - -// AW is a struct for the AddWatcher queue. -type AW struct { - path string - opts []etcd.OpOption - callback func(*RE) error - errCheck bool - skipConv bool // ask event to skip converger updates - resp event.Resp - cancelFunc func() // data -} - -// RE is a response + error struct since these two values often occur together. -// This is now called an event with the move to the etcd v3 API. -type RE struct { - response etcd.WatchResponse - path string - err error - callback func(*RE) error - errCheck bool // should we check the error of the callback? - skipConv bool // event skips converger updates - retryHint bool // set to true for one event after a watcher failure - retries uint // number of times we've retried on error -} - -// KV is a key + value struct to hold the two items together. -type KV struct { - key string - value string - opts []etcd.OpOption - resp event.Resp -} - -// GQ is a struct for the get queue. -type GQ struct { - path string - skipConv bool - opts []etcd.OpOption - resp event.Resp - data map[string]string -} - -// DL is a struct for the delete queue. -type DL struct { - path string - opts []etcd.OpOption - resp event.Resp - data int64 -} - -// TN is a struct for the txn queue. -type TN struct { - ifcmps []etcd.Cmp - thenops []etcd.Op - elseops []etcd.Op - resp event.Resp - data *etcd.TxnResponse -} - -// Flags are some constant flags which are used throughout the program. -type Flags struct { - Debug bool // add additional log messages - Trace bool // add execution flow log messages - Verbose bool // add extra log message output -} - // EmbdEtcd provides the embedded server and client etcd functionality. type EmbdEtcd struct { // EMBeddeD etcd - // etcd client connection related - cLock sync.Mutex // client connect lock - rLock sync.RWMutex // client reconnect lock - client *etcd.Client - cError error // permanent client error - ctxErr error // permanent ctx error + Hostname string - // exit and cleanup related - cancelLock sync.Mutex // lock for the cancels list - cancels []func() // array of every cancel function for watches - exiting bool - exitchan chan struct{} - exitchanCb chan struct{} - exitwg *sync.WaitGroup // wait for main loops to shutdown + // Seeds is the list of servers that this client could connect to. + Seeds etcdtypes.URLs - hostname string - memberID uint64 // cluster membership id of server if running - endpoints etcdtypes.URLsMap // map of servers a client could connect to - clientURLs etcdtypes.URLs // locations to listen for clients if i am a server - serverURLs etcdtypes.URLs // locations to listen for servers if i am a server (peer) - advertiseClientURLs etcdtypes.URLs // client urls to advertise - advertiseServerURLs etcdtypes.URLs // server urls to advertise - noServer bool // disable all server peering if true - noNetwork bool // use unix:// sockets instead of TCP for clients/servers + // ClientURLs are the locations to listen for clients if i am a server. + ClientURLs etcdtypes.URLs + // ServerURLs are the locations to listen for servers (peers) if i am a + // server (peer). + ServerURLs etcdtypes.URLs + // AClientURLs are the client urls to advertise. + AClientURLs etcdtypes.URLs + // AServerURLscare the server (peer) urls to advertise. + AServerURLs etcdtypes.URLs - // local tracked state - nominated etcdtypes.URLsMap // copy of who's nominated to locally track state - lastRevision int64 // the revision id of message being processed - idealClusterSize uint16 // ideal cluster size + // NoServer disables all server peering for this host. + // TODO: allow changing this at runtime with some function call? + NoServer bool + // NoNetwork causes this to use unix:// sockets instead of TCP for + // connections. + NoNetwork bool - // etcd channels - awq chan *AW // add watch queue - wevents chan *RE // response+error - setq chan *KV // set queue - getq chan *GQ // get queue - delq chan *DL // delete queue - txnq chan *TN // txn queue + // Chooser is the implementation of the algorithm that decides which + // hosts to add or remove to grow and shrink the cluster. + Chooser chooser.Chooser - flags Flags - prefix string // folder prefix to use for misc storage - converger *converger.Coordinator // converged tracking + // Converger is a converged coordinator object that can be used to + // track the converged state. + Converger *converger.Coordinator - // etcd server related - serverwg sync.WaitGroup // wait for server to shutdown - server *embed.Etcd // technically this contains the server struct - dataDir string // our data dir, prefix + "etcd" - serverReady chan struct{} // closes when ready + // NS is a string namespace that we prefix to every key operation. + NS string + + // Prefix is the directory where any etcd related state is stored. It + // must be an absolute directory path. + Prefix string + + Debug bool + Logf func(format string, v ...interface{}) + + wg *sync.WaitGroup + exit *util.EasyExit // exit signal + closing bool // are we closing ? + hardexit *util.EasyExit // hard exit signal (to unblock borked things) + + errChan chan error // global error chan, closes when Run is done + + // errExit1 ... errExitN all must get closed for errChan to close. + errExit1 chan struct{} // connect + errExit2 chan struct{} // chooser + errExit3 chan struct{} // nominate + errExit4 chan struct{} // volunteer + errExit5 chan struct{} // endpoints + errExitN chan struct{} // special signal for server closing (starts/stops) + + // coordinate an organized exit so we wait for everyone without blocking + activeExit1 bool + activeExit2 bool + activeExit3 bool + activeExit4 bool + activeExit5 bool + activateExit1 *util.EasyAckOnce + activateExit2 *util.EasyAckOnce + activateExit3 *util.EasyAckOnce + activateExit4 *util.EasyAckOnce + activateExit5 *util.EasyAckOnce + + readySignal chan struct{} // closes when we're up and running + exitsSignal chan struct{} // closes when run exits + + // locally tracked state + + // nominated is a local cache of who's been nominated. This contains + // values for where a *server* would connect to. It gets updated + // primarily in the nominateCb watcher loop. + // TODO: maybe this should just be a list? + // TODO: is there a difference here between ServerURLs and AServerURLs ? + nominated etcdtypes.URLsMap // map[hostname]URLs + + // volunteers is a local cache of who's volunteered. This contains + // values for where a *server* would connect to. It gets updated + // primarily in the volunteerCb watcher loop. + // TODO: maybe this should just be a list? + // TODO: is there a difference here between ServerURLs and AServerURLs ? + volunteers etcdtypes.URLsMap // map[hostname]URLs + + // membermap is a local cache of server endpoints. This contains values + // for where a *server* (peer) would connect to. It gets updated in the + // membership state functions. + membermap etcdtypes.URLsMap // map[hostname]URLs + + // endpoints is a local cache of server endpoints. It differs from the + // config value which is a flattened representation of the same. That + // value can be seen via client.Endpoints() and client.SetEndpoints(). + // This contains values for where a *client* would connect to. It gets + // updated in the membership state functions. + endpoints etcdtypes.URLsMap // map[hostname]URLs + + // memberIDs is a local cache of which cluster servers (peers) are + // associated with each memberID. It gets updated in the membership + // state functions. Note that unstarted members have an ID, but no name + // yet, so they aren't included here, since that key would be the empty + // string. + memberIDs map[string]uint64 // map[hostname]memberID + + // behaviour mutexes + stateMutex *sync.RWMutex // lock around all locally tracked state + orderingMutex *sync.Mutex // lock around non-concurrent changes + nominatedMutex *sync.Mutex // lock around nominatedCb + volunteerMutex *sync.Mutex // lock around volunteerCb + + // client related + etcd *etcd.Client + connectSignal chan struct{} // TODO: use a SubscribedSignal instead? + client *client.Simple // provides useful helper methods + clients []*client.Simple // list of registered clients + session *concurrency.Session // session that expires on disconnect + leaseID etcd.LeaseID // the leaseID used by this session + + // server related + server *embed.Etcd // contains the server struct + serverID uint64 // uint64 because memberRemove uses that + serverwg *sync.WaitGroup // wait for server to shutdown + servermu *sync.Mutex // lock around destroy server + serverExit *util.EasyExit // exit signal + serverReadySignal *util.SubscribedSignal // signals when server is up and running + serverExitsSignal *util.SubscribedSignal // signals when runServer exits + + // task queue state + taskQueue []*task + taskQueueWg *sync.WaitGroup + taskQueueLock *sync.Mutex + taskQueueRunning bool + taskQueueID int } -// NewEmbdEtcd creates the top level embedded etcd struct client and server obj. -func NewEmbdEtcd(hostname string, seeds, clientURLs, serverURLs, advertiseClientURLs, advertiseServerURLs etcdtypes.URLs, noServer bool, noNetwork bool, idealClusterSize uint16, flags Flags, prefix string, converger *converger.Coordinator) *EmbdEtcd { - endpoints := make(etcdtypes.URLsMap) - if hostname == seedSentinel { // safety - return nil - } - if noServer && len(seeds) == 0 { - log.Printf("Etcd: need at least one seed if running with --no-server!") - return nil - } - if noNetwork { - if len(clientURLs) != 0 || len(serverURLs) != 0 || len(seeds) != 0 { - log.Printf("--no-network is mutual exclusive with --seeds, --client-urls and --server-urls") - return nil - } - clientURLs, _ = etcdtypes.NewURLs([]string{"unix://clients.sock:0"}) - serverURLs, _ = etcdtypes.NewURLs([]string{"unix://servers.sock:0"}) - } - - if len(seeds) > 0 { - endpoints[seedSentinel] = seeds - idealClusterSize = 0 // unset, get from running cluster - } - obj := &EmbdEtcd{ - exitchan: make(chan struct{}), // exit signal for main loop - exitchanCb: make(chan struct{}), - exitwg: &sync.WaitGroup{}, - awq: make(chan *AW), - wevents: make(chan *RE), - setq: make(chan *KV), - getq: make(chan *GQ), - delq: make(chan *DL), - txnq: make(chan *TN), - - nominated: make(etcdtypes.URLsMap), - - hostname: hostname, - endpoints: endpoints, - clientURLs: clientURLs, - serverURLs: serverURLs, - advertiseClientURLs: advertiseClientURLs, - advertiseServerURLs: advertiseServerURLs, - noServer: noServer, - noNetwork: noNetwork, - - idealClusterSize: idealClusterSize, - converger: converger, - flags: flags, - prefix: prefix, - dataDir: path.Join(prefix, "etcd"), - serverReady: make(chan struct{}), - } - // TODO: add some sort of auto assign method for picking these defaults - // add a default so that our local client can connect locally if needed - if len(obj.LocalhostClientURLs()) == 0 { // if we don't have any localhost URLs - u, err := url.Parse(DefaultClientURL) - if err != nil { - return nil // TODO: change interface to return an error - } - obj.clientURLs = append([]url.URL{*u}, obj.clientURLs...) // prepend - } - - // add a default for local use and testing, harmless and useful! - if !obj.noServer && len(obj.serverURLs) == 0 { - if len(obj.endpoints) > 0 { - obj.noServer = true // we didn't have enough to be a server - } - u, err := url.Parse(DefaultServerURL) // default - if err != nil { - return nil // TODO: change interface to return an error - } - obj.serverURLs = []url.URL{*u} - } - - if converger != nil { - converger.AddStateFn("etcd-hostname", func(converged bool) error { - // send our individual state into etcd for others to see - return SetHostnameConverged(obj, hostname, converged) // TODO: what should happen on error? - }) - } - - return obj +// sessionTTLSec transforms the time representation into the nearest number of +// seconds, which is needed by the etcd API. +func sessionTTLSec(d time.Duration) int { + return int(d.Seconds()) } -// GetClient returns a handle to the raw etcd client object for those scenarios. -func (obj *EmbdEtcd) GetClient() *etcd.Client { - return obj.client -} +// Validate the initial struct. This is called from Init, but can be used if you +// would like to check your configuration is correct. +func (obj *EmbdEtcd) Validate() error { + s := sessionTTLSec(SessionTTL) + if s <= 0 { + return fmt.Errorf("the SessionTTL const of %s (%d sec) must be greater than zero", SessionTTL.String(), s) + } + if s > etcd.MaxLeaseTTL { + return fmt.Errorf("the SessionTTL const of %s (%d sec) must be less than %d sec", SessionTTL.String(), s, etcd.MaxLeaseTTL) + } -// GetConfig returns the config struct to be used for the etcd client connect. -func (obj *EmbdEtcd) GetConfig() etcd.Config { - endpoints := []string{} - // XXX: filter out any urls which wouldn't resolve here ? - for _, eps := range obj.endpoints { // flatten map - for _, u := range eps { - endpoints = append(endpoints, u.String()) // use full url including scheme - } + if obj.Hostname == "" { + return fmt.Errorf("the Hostname was not specified") } - sort.Strings(endpoints) // sort for determinism - cfg := etcd.Config{ - Endpoints: endpoints, - // RetryDialer chooses the next endpoint to use - // it comes with a default dialer if unspecified - DialTimeout: 5 * time.Second, - } - return cfg -} -// Connect connects the client to a server, and then builds the *API structs. -// If reconnect is true, it will force a reconnect with new config endpoints. -func (obj *EmbdEtcd) Connect(reconnect bool) error { - if obj.flags.Debug { - log.Println("Etcd: Connect...") + if obj.NoServer && len(obj.Seeds) == 0 { + return fmt.Errorf("need at least one seed if NoServer is true") } - obj.cLock.Lock() - defer obj.cLock.Unlock() - if obj.cError != nil { // stop on permanent error - return obj.cError - } - if obj.client != nil { // memoize - if reconnect { - // i think this requires the rLock when using it concurrently - err := obj.client.Close() - if err != nil { - log.Printf("Etcd: (Re)Connect: Close: Error: %+v", err) - } - obj.client = nil // for kicks - } else { - return nil + + if !obj.NoServer { // you don't need a Chooser if there's no server... + if obj.Chooser == nil { + return fmt.Errorf("need to specify a Chooser implementation") + } + if err := obj.Chooser.Validate(); err != nil { + return errwrap.Wrapf(err, "the Chooser did not validate") } } - var emax uint16 // = 0 - for { // loop until connect - var err error - cfg := obj.GetConfig() - if eps := obj.endpoints; len(eps) > 0 { - log.Printf("Etcd: Connect: Endpoints: %v", eps) - } else { - log.Printf("Etcd: Connect: Endpoints: []") + + if obj.NoNetwork { + if len(obj.Seeds) != 0 || len(obj.ClientURLs) != 0 || len(obj.ServerURLs) != 0 { + return fmt.Errorf("NoNetwork is mutually exclusive with Seeds, ClientURLs and ServerURLs") } - obj.client, err = etcd.New(cfg) // connect! - if err == etcd.ErrNoAvailableEndpoints { - emax++ - if emax > maxClientConnectRetries { - log.Printf("Etcd: The dataDir (%s) might be inconsistent or corrupt.", obj.dataDir) - log.Printf("Etcd: Please see: %s", "https://github.com/purpleidea/mgmt/blob/master/docs/faq.md#what-does-the-error-message-about-an-inconsistent-datadir-mean") - obj.cError = fmt.Errorf("can't find an available endpoint") - return obj.cError - } - err = &CtxDelayErr{time.Duration(emax) * time.Second, "No endpoints available yet!"} // retry with backoff... - } - if err != nil { - log.Printf("Etcd: Connect: CtxError...") - if _, e := obj.CtxError(context.TODO(), err); e != nil { - log.Printf("Etcd: Connect: CtxError: Fatal: %v", e) - obj.cError = e - return e // fatal error - } - continue - } - // check if we're actually connected here, because this must - // block if we're not connected - if obj.client == nil { - log.Printf("Etcd: Connect: Is nil!") - continue - } - break } + + if _, err := copyURLs(obj.Seeds); err != nil { // this will validate + return errwrap.Wrapf(err, "the Seeds are not valid") + } + + if obj.NS == "/" { + return fmt.Errorf("the namespace should be empty instead of /") + } + if strings.HasSuffix(obj.NS, "/") { + return fmt.Errorf("the namespace should not end in /") + } + + if obj.Prefix == "" || obj.Prefix == "/" { + return fmt.Errorf("the prefix of `%s` is invalid", obj.Prefix) + } + + if obj.Logf == nil { + return fmt.Errorf("no Logf function was specified") + } + return nil } -// Startup is the main entry point to kick off the embedded etcd client & server. -func (obj *EmbdEtcd) Startup() error { - bootstrapping := len(obj.endpoints) == 0 // because value changes after start +// Init initializes the struct after it has been populated as desired. You must +// not use the struct if this returns an error. +func (obj *EmbdEtcd) Init() error { + if err := obj.Validate(); err != nil { + return errwrap.Wrapf(err, "validate error") + } - // connect but don't block here, because servers might not be up yet... + if obj.ClientURLs == nil { + obj.ClientURLs = []url.URL{} // initialize + } + if obj.ServerURLs == nil { + obj.ServerURLs = []url.URL{} + } + if obj.AClientURLs == nil { + obj.AClientURLs = []url.URL{} + } + if obj.AServerURLs == nil { + obj.AServerURLs = []url.URL{} + } + + curls, err := obj.curls() + if err != nil { + return err + } + surls, err := obj.surls() + if err != nil { + return err + } + if !obj.NoServer { + // add a default + if len(curls) == 0 { + u, err := url.Parse(DefaultClientURL) + if err != nil { + return err + } + obj.ClientURLs = []url.URL{*u} + } + // add a default for local use and testing, harmless and useful! + if len(surls) == 0 { + u, err := url.Parse(DefaultServerURL) // default + if err != nil { + return err + } + obj.ServerURLs = []url.URL{*u} + } + + // TODO: if we don't have any localhost URLs, should we warn so + // that our local client can be able to connect more easily? + if len(localhostURLs(obj.ClientURLs)) == 0 { + u, err := url.Parse(DefaultClientURL) + if err != nil { + return err + } + obj.ClientURLs = append([]url.URL{*u}, obj.ClientURLs...) // prepend + } + } + + if obj.NoNetwork { + var err error + // FIXME: convince etcd to store these files in our obj.Prefix! + obj.ClientURLs, err = etcdtypes.NewURLs([]string{"unix://clients.sock:0"}) + if err != nil { + return err + } + obj.ServerURLs, err = etcdtypes.NewURLs([]string{"unix://servers.sock:0"}) + if err != nil { + return err + } + } + + if obj.Chooser != nil { + data := &chooser.Data{ + Hostname: obj.Hostname, + Debug: obj.Debug, + Logf: func(format string, v ...interface{}) { + obj.Logf("chooser: "+format, v...) + }, + } + if err := obj.Chooser.Init(data); err != nil { + return errwrap.Wrapf(err, "error initializing chooser") + } + } + + if err := os.MkdirAll(obj.Prefix, 0770); err != nil { + return errwrap.Wrapf(err, "couldn't mkdir: %s", obj.Prefix) + } + + obj.wg = &sync.WaitGroup{} + obj.exit = util.NewEasyExit() + obj.hardexit = util.NewEasyExit() + + obj.errChan = make(chan error) + + obj.errExit1 = make(chan struct{}) + obj.errExit2 = make(chan struct{}) + obj.errExit3 = make(chan struct{}) + obj.errExit4 = make(chan struct{}) + obj.errExit5 = make(chan struct{}) + obj.errExitN = make(chan struct{}) // done before call to runServer! + close(obj.errExitN) // starts closed + + //obj.activeExit1 = false + //obj.activeExit2 = false + //obj.activeExit3 = false + //obj.activeExit4 = false + //obj.activeExit5 = false + obj.activateExit1 = util.NewEasyAckOnce() + obj.activateExit2 = util.NewEasyAckOnce() + obj.activateExit3 = util.NewEasyAckOnce() + obj.activateExit4 = util.NewEasyAckOnce() + obj.activateExit5 = util.NewEasyAckOnce() + + obj.readySignal = make(chan struct{}) + obj.exitsSignal = make(chan struct{}) + + // locally tracked state + obj.nominated = make(etcdtypes.URLsMap) + obj.volunteers = make(etcdtypes.URLsMap) + obj.membermap = make(etcdtypes.URLsMap) + obj.endpoints = make(etcdtypes.URLsMap) + obj.memberIDs = make(map[string]uint64) + + // behaviour mutexes + obj.stateMutex = &sync.RWMutex{} + // TODO: I'm not sure if orderingMutex is actually required or not... + obj.orderingMutex = &sync.Mutex{} + obj.nominatedMutex = &sync.Mutex{} + obj.volunteerMutex = &sync.Mutex{} + + // client related + obj.connectSignal = make(chan struct{}) + obj.clients = []*client.Simple{} + + // server related + obj.serverwg = &sync.WaitGroup{} + obj.servermu = &sync.Mutex{} + obj.serverExit = util.NewEasyExit() // is reset after destroyServer exit + obj.serverReadySignal = &util.SubscribedSignal{} + obj.serverExitsSignal = &util.SubscribedSignal{} + + // task queue state + obj.taskQueue = []*task{} + obj.taskQueueWg = &sync.WaitGroup{} + obj.taskQueueLock = &sync.Mutex{} + + return nil +} + +// Close cleans up after you are done using the struct. +func (obj *EmbdEtcd) Close() error { + var reterr error + + if obj.Chooser != nil { + reterr = errwrap.Append(reterr, obj.Chooser.Close()) + } + + return reterr +} + +// curls returns the client urls that we should use everywhere except for +// locally, where we prefer to use the non-advertised perspective. +func (obj *EmbdEtcd) curls() (etcdtypes.URLs, error) { + // TODO: do we need the copy? + if len(obj.AClientURLs) > 0 { + return copyURLs(obj.AClientURLs) + } + return copyURLs(obj.ClientURLs) +} + +// surls returns the server (peer) urls that we should use everywhere except for +// locally, where we prefer to use the non-advertised perspective. +func (obj *EmbdEtcd) surls() (etcdtypes.URLs, error) { + // TODO: do we need the copy? + if len(obj.AServerURLs) > 0 { + return copyURLs(obj.AServerURLs) + } + return copyURLs(obj.ServerURLs) +} + +// err is an error helper that sends to the errChan. +func (obj *EmbdEtcd) err(err error) { + select { + case obj.errChan <- err: + } +} + +// Run is the main entry point to kick off the embedded etcd client and server. +// It blocks until we've exited for shutdown. The shutdown can be triggered by +// calling Destroy. +func (obj *EmbdEtcd) Run() error { + curls, err := obj.curls() + if err != nil { + return err + } + surls, err := obj.surls() + if err != nil { + return err + } + + exitCtx := obj.exit.Context() // local exit signal + obj.Logf("running...") + defer obj.Logf("exited!") + wg := &sync.WaitGroup{} + defer wg.Wait() + defer close(obj.exitsSignal) + defer obj.wg.Wait() + defer obj.exit.Done(nil) // unblock anything waiting for exit... + startupCtx, cancel := context.WithTimeout(exitCtx, RunStartupTimeout) + defer cancel() + defer obj.Logf("waiting for exit cleanup...") // TODO: is this useful? + + // After we trigger a hardexit, wait for the ForceExitTimeout and then + // cancel any remaining stuck context's. This helps prevent angry users. + unblockCtx, runTimeout := context.WithCancel(context.Background()) + defer runTimeout() + wg.Add(1) go func() { - if err := obj.Connect(false); err != nil { - log.Printf("Etcd: Startup: Error: %v", err) - // XXX: Now cause Startup() to exit with error somehow! + defer wg.Done() + defer runTimeout() + select { + case <-obj.hardexit.Signal(): // bork unblocker + case <-obj.exitsSignal: + } + + select { + case <-time.After(ForceExitTimeout): + case <-obj.exitsSignal: } }() - go obj.CbLoop() // start callback loop - go obj.Loop() // start main loop + // main loop exit signal + obj.wg.Add(1) + go func() { + defer obj.wg.Done() + // when all the senders on errChan have exited, we can exit too + defer close(obj.errChan) + // these signals guard around the errChan close operation + wg := &sync.WaitGroup{} + wg.Add(1) + go func() { + defer wg.Done() + // We wait here until we're notified to know whether or + // not this particular exit signal will be relevant... + // This is because during some runs, we might not use + // all of the signals, therefore we don't want to wait + // for them! + select { + case <-obj.activateExit1.Wait(): + case <-exitCtx.Done(): + } + if !obj.activeExit1 { + return + } + select { + case <-obj.errExit1: + if obj.Debug { + obj.Logf("exited connect loop (1)") + } + } + }() + wg.Add(1) + go func() { + defer wg.Done() + select { + case <-obj.activateExit2.Wait(): + case <-exitCtx.Done(): + } + if !obj.activeExit2 { + return + } + select { + case <-obj.errExit2: + if obj.Debug { + obj.Logf("exited chooser loop (2)") + } + } + }() + wg.Add(1) + go func() { + defer wg.Done() + select { + case <-obj.activateExit3.Wait(): + case <-exitCtx.Done(): + } + if !obj.activeExit3 { + return + } + select { + case <-obj.errExit3: + if obj.Debug { + obj.Logf("exited nominate loop (3)") - // TODO: implement native etcd watcher method on member API changes - path := fmt.Sprintf("%s/nominated/", NS) - go obj.AddWatcher(path, obj.nominateCallback, true, false, etcd.WithPrefix()) // no block + } + } + }() + wg.Add(1) + go func() { + defer wg.Done() + select { + case <-obj.activateExit4.Wait(): + case <-exitCtx.Done(): + } + if !obj.activeExit4 { + return + } + select { + case <-obj.errExit4: + if obj.Debug { + obj.Logf("exited volunteer loop (4)") + } + } + }() + wg.Add(1) + go func() { + defer wg.Done() + select { + case <-obj.activateExit5.Wait(): + case <-exitCtx.Done(): + } + if !obj.activeExit5 { + return + } + select { + case <-obj.errExit5: + if obj.Debug { + obj.Logf("exited endpoints loop (5)") + } + } + }() + wg.Wait() // wait for all the other exit signals before this one + select { + case <-obj.errExitN: // last one is for server (it can start/stop) + if obj.Debug { + obj.Logf("exited server loop (0)") + } + } + }() - // setup ideal cluster size watcher - key := fmt.Sprintf("%s/idealClusterSize", NS) - go obj.AddWatcher(key, obj.idealClusterSizeCallback, true, false) // no block + // main loop + var reterr error + obj.wg.Add(1) + go func() { + defer obj.wg.Done() + Loop: + for { + select { + case err, ok := <-obj.errChan: + if !ok { // when this closes, we can shutdown + break Loop + } + if err == nil { + err = fmt.Errorf("unexpected nil error") + } + obj.Logf("runtime error: %+v", err) + if reterr == nil { // happens only once + obj.exit.Done(err) // trigger an exit in Run! + } + reterr = errwrap.Append(reterr, err) + } + } + }() - // if we have no endpoints, it means we are bootstrapping... - if !bootstrapping { - log.Println("Etcd: Startup: Getting initial values...") - if nominated, err := Nominated(obj); err == nil { - obj.nominated = nominated // store a local copy - } else { - log.Printf("Etcd: Startup: Nominate lookup error.") - obj.Destroy() - return fmt.Errorf("Etcd: Startup: Error: %v", err) + bootstrapping := len(obj.Seeds) == 0 // we're the first, start a server! + canServer := !obj.NoServer + + // Opportunistic "connect events" system, so that we can connect + // promiscuously when it's needed, instead of needing to linearize code. + obj.activeExit1 = true // activate errExit1 + obj.activateExit1.Ack() + obj.wg.Add(1) + go func() { + defer obj.wg.Done() + defer close(obj.errExit1) // multi-signal for errChan close op + if bootstrapping { + serverReady, ackReady := obj.ServerReady() // must call ack! + serverExited, ackExited := obj.ServerExited() // must call ack! + select { + case <-serverReady: + ackReady() // must be called + ackExited() // must be called + + case <-serverExited: + ackExited() // must be called + ackReady() // must be called + // send an error in case server doesn't + // TODO: do we want this error to be sent? + obj.err(fmt.Errorf("server exited early")) + return + + case <-obj.exit.Signal(): // exit early on exit signal + ackReady() // must be called + ackExited() // must be called + return + } } - // get initial ideal cluster size - if idealClusterSize, err := GetClusterSize(obj); err == nil { - obj.idealClusterSize = idealClusterSize - log.Printf("Etcd: Startup: Ideal cluster size is: %d", idealClusterSize) - } else { - // perhaps the first server didn't set it yet. it's ok, - // we can get it from the watcher if it ever gets set! - log.Printf("Etcd: Startup: Ideal cluster size lookup error.") + // Connect here. If we're bootstrapping, the server came up + // right above us. No need to add to our endpoints manually, + // that is done for us in the server start method. + if err := obj.connect(); err != nil { + obj.err(errwrap.Wrapf(err, "error during client connect")) + return } - } - - if !obj.noServer { - path := fmt.Sprintf("%s/volunteers/", NS) - go obj.AddWatcher(path, obj.volunteerCallback, true, false, etcd.WithPrefix()) // no block - } - - // if i am alone and will have to be a server... - if !obj.noServer && bootstrapping { - log.Printf("Etcd: Bootstrapping...") - surls := obj.serverURLs - if len(obj.advertiseServerURLs) > 0 { - surls = obj.advertiseServerURLs + obj.client = client.NewClientFromClient(obj.etcd) + obj.client.Debug = obj.Debug + obj.client.Logf = func(format string, v ...interface{}) { + obj.Logf("client: "+format, v...) } - // give an initial value to the obj.nominate map we keep in sync - // this emulates Nominate(obj, obj.hostname, obj.serverURLs) - obj.nominated[obj.hostname] = surls // initial value - // NOTE: when we are stuck waiting for the server to start up, - // it is probably happening on this call right here... - obj.nominateCallback(nil) // kick this off once - } - - // self volunteer - if !obj.noServer && len(obj.serverURLs) > 0 { - // we run this in a go routine because it blocks waiting for server - surls := obj.serverURLs - if len(obj.advertiseServerURLs) > 0 { - surls = obj.advertiseServerURLs + if err := obj.client.Init(); err != nil { + obj.err(errwrap.Wrapf(err, "error during client init")) + return } - log.Printf("Etcd: Startup: Volunteering...") - go Volunteer(obj, surls) - } + // Build a session for making leases that expire on disconnect! + options := []concurrency.SessionOption{ + concurrency.WithTTL(sessionTTLSec(SessionTTL)), + } + if obj.leaseID > 0 { // in the odd chance we ever do reconnects + options = append(options, concurrency.WithLease(obj.leaseID)) + } + obj.session, err = concurrency.NewSession(obj.etcd, options...) + if err != nil { + obj.err(errwrap.Wrapf(err, "could not create session")) + return + } + obj.leaseID = obj.session.Lease() + + obj.Logf("connected!") + if !bootstrapping { // new clients need an initial state sync... + if err := obj.memberStateFromList(startupCtx); err != nil { + obj.err(errwrap.Wrapf(err, "error during initial state sync")) + return + } + } + close(obj.connectSignal) + }() + defer func() { + if obj.session != nil { + obj.session.Close() // this revokes the lease... + } + + // run cleanup functions in reverse order + for i := len(obj.clients) - 1; i >= 0; i-- { + obj.clients[i].Close() // ignore errs + } + if obj.client != nil { // in case we bailed out early + obj.client.Close() // ignore err, but contains wg.Wait() + } + if obj.etcd == nil { // in case we bailed out early + return + } + obj.disconnect() + obj.Logf("disconnected!") + //close(obj.disconnectSignal) + }() + + obj.Logf("watching chooser...") + chooserChan := make(chan error) + obj.activeExit2 = true // activate errExit2 + obj.activateExit2.Ack() + obj.wg.Add(1) + go func() { + defer obj.wg.Done() + defer close(obj.errExit2) // multi-signal for errChan close op + if obj.Chooser == nil { + return + } + + // wait till we're connected + select { + case <-obj.connectSignal: + case <-exitCtx.Done(): + return // run exited early + } + + p := obj.NS + ChooserPath + c, err := obj.MakeClientFromNamespace(p) + if err != nil { + obj.err(errwrap.Wrapf(err, "error during chooser init")) + return + } + if err := obj.Chooser.Connect(exitCtx, c); err != nil { + obj.err(errwrap.Wrapf(err, "error during chooser connect")) + return + } + + ch, err := obj.Chooser.Watch() + if err != nil { + obj.err(errwrap.Wrapf(err, "error running chooser watch")) + return + } + chooserChan = ch // watch it + }() + defer func() { + if obj.Chooser == nil { + return + } + obj.Chooser.Disconnect() // ignore error if any + }() + + // call this once to start the server so we'll be able to connect if bootstrapping { - if err := SetClusterSize(obj, obj.idealClusterSize); err != nil { - log.Printf("Etcd: Startup: Ideal cluster size storage error.") - obj.Destroy() - return fmt.Errorf("Etcd: Startup: Error: %v", err) + obj.Logf("bootstrapping...") + obj.volunteers[obj.Hostname] = surls // bootstrap this! + obj.nominated[obj.Hostname] = surls + // alternatively we can bootstrap like this if we add more stuff... + //data := bootstrapWatcherData(obj.Hostname, surls) // server urls + //if err := obj.nominateApply(data); err != nil { // fake apply + // return err + //} + // server starts inside here if bootstrapping! + if err := obj.nominateCb(startupCtx); err != nil { + // If while bootstrapping a new server, an existing one + // is running on the same port, then we error this here. + return err + } + + // wait till we're connected + select { + case <-obj.connectSignal: + case <-exitCtx.Done(): + // TODO: should this return an error? + return nil // run exited early + } + + // advertise our new endpoint (comes paired after nominateCb) + if err := obj.advertise(startupCtx, obj.Hostname, curls); err != nil { // endpoints + return errwrap.Wrapf(err, "error with endpoints advertise") + } + + // run to add entry into our public nominated datastructure + // FIXME: this might be redundant, but probably not harmful in + // our bootstrapping process... it will get done in volunteerCb + if err := obj.nominate(startupCtx, obj.Hostname, surls); err != nil { + return errwrap.Wrapf(err, "error nominating self") } } - go obj.AddWatcher(fmt.Sprintf("%s/endpoints/", NS), obj.endpointCallback, true, false, etcd.WithPrefix()) + // If obj.NoServer, then we don't need to start up the nominate watcher, + // unless we're the first server... But we check that both are not true! + if bootstrapping || canServer { + if !bootstrapping && canServer { // wait for client! + select { + case <-obj.connectSignal: + case <-exitCtx.Done(): + return nil // just exit + } + } - if err := obj.Connect(false); err != nil { // don't exit from this Startup function until connected! + ctx, cancel := context.WithCancel(unblockCtx) + defer cancel() + info, err := obj.client.ComplexWatcher(ctx, obj.NS+NominatedPath, etcd.WithPrefix()) + if err != nil { + obj.activateExit3.Ack() + return errwrap.Wrapf(err, "error adding nominated watcher") + } + obj.Logf("watching nominees...") + obj.activeExit3 = true // activate errExit3 + obj.activateExit3.Ack() + obj.wg.Add(1) + go func() { + defer obj.wg.Done() + defer close(obj.errExit3) // multi-signal for errChan close op + defer cancel() + for { + var event *interfaces.WatcherData + var ok bool + select { + case event, ok = <-info.Events: + if !ok { + return + } + } + + if err := event.Err; err != nil { + obj.err(errwrap.Wrapf(err, "nominated watcher errored")) + continue + } + + // on the initial created event, we populate... + if !bootstrapping && event.Created && len(event.Events) == 0 { + obj.Logf("populating nominated list...") + nominated, err := obj.getNominated(ctx) + if err != nil { + obj.err(errwrap.Wrapf(err, "get nominated errored")) + continue + } + obj.nominated = nominated + + } else if err := obj.nominateApply(event); err != nil { + obj.err(errwrap.Wrapf(err, "nominate apply errored")) + continue + } + + // decide the desired state before we change it + doStop := obj.serverAction(serverActionStop) + doStart := obj.serverAction(serverActionStart) + + // server is running, but it should not be + if doStop { // stop? + // first, un advertise client urls + // TODO: should this cause destroy server instead? does it already? + if err := obj.advertise(ctx, obj.Hostname, nil); err != nil { // remove me + obj.err(errwrap.Wrapf(err, "error with endpoints unadvertise")) + continue + } + } + + // runServer gets started in a goroutine here... + err := obj.nominateCb(ctx) + if obj.Debug { + obj.Logf("nominateCb: %+v", err) + } + + if doStart { // start? + if err := obj.advertise(ctx, obj.Hostname, curls); err != nil { // add one + obj.err(errwrap.Wrapf(err, "error with endpoints advertise")) + continue + } + } + + if err == interfaces.ErrShutdown { + if obj.Debug { + obj.Logf("nominated watcher shutdown") + } + return + } + if err == nil { + continue + } + obj.err(errwrap.Wrapf(err, "nominated watcher callback errored")) + continue + } + }() + defer func() { + // wait for unnominate of self to be seen... + select { + case <-obj.errExit3: + case <-obj.hardexit.Signal(): // bork unblocker + obj.Logf("unblocked unnominate signal") + // now unblock the server in case it's running! + if err := obj.destroyServer(); err != nil { // sync until exited + obj.err(errwrap.Wrapf(err, "destroyServer errored")) + return + } + } + }() + defer func() { + // wait for volunteer loop to exit + select { + case <-obj.errExit4: + } + }() + } + obj.activateExit3.Ack() + + // volunteering code (volunteer callback and initial volunteering) + if !obj.NoServer && len(obj.ServerURLs) > 0 { + ctx, cancel := context.WithCancel(unblockCtx) + defer cancel() // cleanup on close... + info, err := obj.client.ComplexWatcher(ctx, obj.NS+VolunteerPath, etcd.WithPrefix()) + if err != nil { + obj.activateExit4.Ack() + return errwrap.Wrapf(err, "error adding volunteer watcher") + } + unvolunteered := make(chan struct{}) + obj.Logf("watching volunteers...") + obj.wg.Add(1) + obj.activeExit4 = true // activate errExit4 + obj.activateExit4.Ack() + go func() { + defer obj.wg.Done() + defer close(obj.errExit4) // multi-signal for errChan close op + for { + var event *interfaces.WatcherData + var ok bool + select { + case event, ok = <-info.Events: + if !ok { + return + } + if err := event.Err; err != nil { + obj.err(errwrap.Wrapf(err, "volunteer watcher errored")) + continue + } + + case chooserEvent, ok := <-chooserChan: + if !ok { + obj.Logf("got chooser shutdown...") + chooserChan = nil // done here! + continue + } + if chooserEvent != nil { + obj.err(errwrap.Wrapf(err, "chooser watcher errored")) + continue + } + obj.Logf("got chooser event...") + event = nil // pass through the apply... + // chooser events should poke volunteerCb + } + + _, exists1 := obj.volunteers[obj.Hostname] // before + + // on the initial created event, we populate... + if !bootstrapping && event != nil && event.Created && len(event.Events) == 0 { + obj.Logf("populating volunteers list...") + volunteers, err := obj.getVolunteers(ctx) + if err != nil { + obj.err(errwrap.Wrapf(err, "get volunteers errored")) + continue + } + // TODO: do we need to add ourself? + //_, exists := volunteers[obj.Hostname] + //if !exists { + // volunteers[obj.Hostname] = surls + //} + obj.volunteers = volunteers + + } else if err := obj.volunteerApply(event); event != nil && err != nil { + obj.err(errwrap.Wrapf(err, "volunteer apply errored")) + continue + } + _, exists2 := obj.volunteers[obj.Hostname] // after + + err := obj.volunteerCb(ctx) + if err == nil { + // it was there, and it got removed + if exists1 && !exists2 { + close(unvolunteered) + } + continue + } + obj.err(errwrap.Wrapf(err, "volunteer watcher callback errored")) + continue + } + }() + defer func() { + // wait for unvolunteer of self to be seen... + select { + case <-unvolunteered: + case <-obj.hardexit.Signal(): // bork unblocker + obj.Logf("unblocked unvolunteer signal") + } + }() + + // self volunteer + obj.Logf("volunteering...") + surls, err := obj.surls() + if err != nil { + return err + } + if err := obj.volunteer(ctx, surls); err != nil { + return err + } + defer obj.volunteer(ctx, nil) // unvolunteer + defer obj.Logf("unvolunteering...") + defer func() { + // Move the leader if I'm it, so that the member remove + // chooser operation happens on a different member than + // myself. A leaving member should not decide its fate. + member, err := obj.moveLeaderSomewhere(ctx) + if err != nil { + // TODO: use obj.err ? + obj.Logf("move leader failed with: %+v", err) + return + } + if member != "" { + obj.Logf("moved leader to: %s", member) + } + }() + } + obj.activateExit4.Ack() + + // startup endpoints watcher (to learn about other servers) + ctx, cancel := context.WithCancel(unblockCtx) + defer cancel() // cleanup on close... + if err := obj.runEndpoints(ctx); err != nil { + obj.activateExit5.Ack() return err } + obj.activateExit5.Ack() + // We don't set state, we only watch others, so nothing to defer close! + + if obj.Converger != nil { + obj.Converger.AddStateFn(ConvergerHostnameNamespace, func(converged bool) error { + // send our individual state into etcd for others to see + // TODO: what should happen on error? + return obj.setHostnameConverged(exitCtx, obj.Hostname, converged) + }) + defer obj.Converger.RemoveStateFn(ConvergerHostnameNamespace) + } + + // NOTE: Add anything else we want to start up here... + + // If we get all the way down here, *and* we're connected, we're ready! + obj.wg.Add(1) + go func() { + defer obj.wg.Done() + select { + case <-obj.connectSignal: + close(obj.readySignal) // we're ready to be used now... + case <-exitCtx.Done(): + } + }() + + select { + case <-exitCtx.Done(): + } + obj.closing = true // flag to let nominateCb know we're shutting down... + // kick off all the defer()'s.... + return reterr +} + +// runEndpoints is a helper function to move all of this code into a new block. +func (obj *EmbdEtcd) runEndpoints(ctx context.Context) error { + bootstrapping := len(obj.Seeds) == 0 + select { + case <-obj.connectSignal: + case <-ctx.Done(): + return nil // TODO: just exit ? + } + info, err := obj.client.ComplexWatcher(ctx, obj.NS+EndpointsPath, etcd.WithPrefix()) + if err != nil { + obj.activateExit5.Ack() + return errwrap.Wrapf(err, "error adding endpoints watcher") + } + obj.Logf("watching endpoints...") + obj.wg.Add(1) + obj.activeExit5 = true // activate errExit5 + obj.activateExit5.Ack() + go func() { + defer obj.wg.Done() + defer close(obj.errExit5) // multi-signal for errChan close op + for { + var event *interfaces.WatcherData + var ok bool + select { + case event, ok = <-info.Events: + if !ok { + return + } + if err := event.Err; err != nil { + obj.err(errwrap.Wrapf(err, "endpoints watcher errored")) + continue + } + } + + // on the initial created event, we populate... + if !bootstrapping && event.Created && len(event.Events) == 0 { + obj.Logf("populating endpoints list...") + endpoints, err := obj.getEndpoints(ctx) + if err != nil { + obj.err(errwrap.Wrapf(err, "get endpoints errored")) + continue + } + obj.endpoints = endpoints + obj.setEndpoints() + + } else if err := obj.endpointApply(event); err != nil { + obj.err(errwrap.Wrapf(err, "endpoint apply errored")) + continue + } + + // there is no endpoint callback necessary + + // TODO: do we need this member state sync? + if err := obj.memberStateFromList(ctx); err != nil { + obj.err(errwrap.Wrapf(err, "error during endpoint state sync")) + continue + } + } + }() + return nil } // Destroy cleans up the entire embedded etcd system. Use DestroyServer if you // only want to shutdown the embedded server portion. func (obj *EmbdEtcd) Destroy() error { + obj.Logf("destroy...") + obj.exit.Done(nil) // trigger an exit in Run! - // this should also trigger an unnominate, which should cause a shutdown - log.Printf("Etcd: Destroy: Unvolunteering...") - if err := Volunteer(obj, nil); err != nil { // unvolunteer so we can shutdown... - log.Printf("Etcd: Destroy: Error: %v", err) // we have a problem - } + reterr := obj.exit.Error() // wait for exit signal (block until arrival) - obj.serverwg.Wait() // wait for server shutdown signal - - obj.exiting = true // must happen before we run the cancel functions! - - // clean up any watchers which might want to continue - obj.cancelLock.Lock() // TODO: do we really need the lock here on exit? - log.Printf("Etcd: Destroy: Cancelling %d operations...", len(obj.cancels)) - for _, cancelFunc := range obj.cancels { - cancelFunc() - } - obj.cancelLock.Unlock() - - close(obj.exitchan) // cause main loop to exit - close(obj.exitchanCb) - - obj.rLock.Lock() - if obj.client != nil { - obj.client.Close() - } - obj.client = nil - obj.rLock.Unlock() - - // this happens in response to the unnominate callback. not needed here! - //if obj.server != nil { - // return obj.DestroyServer() - //} - obj.exitwg.Wait() - return nil + obj.wg.Wait() + return reterr } -// CtxDelayErr requests a retry in Delta duration. -type CtxDelayErr struct { - Delta time.Duration - Message string -} - -func (obj *CtxDelayErr) Error() string { - return fmt.Sprintf("CtxDelayErr(%v): %s", obj.Delta, obj.Message) -} - -// CtxRetriesErr lets you retry as long as you have retries available. -// TODO: consider combining this with CtxDelayErr -type CtxRetriesErr struct { - Retries uint - Message string -} - -func (obj *CtxRetriesErr) Error() string { - return fmt.Sprintf("CtxRetriesErr(%v): %s", obj.Retries, obj.Message) -} - -// CtxPermanentErr is a permanent failure error to notify about borkage. -type CtxPermanentErr struct { - Message string -} - -func (obj *CtxPermanentErr) Error() string { - return fmt.Sprintf("CtxPermanentErr: %s", obj.Message) -} - -// CtxReconnectErr requests a client reconnect to the new endpoint list. -type CtxReconnectErr struct { - Message string -} - -func (obj *CtxReconnectErr) Error() string { - return fmt.Sprintf("CtxReconnectErr: %s", obj.Message) -} - -// CancelCtx adds a tracked cancel function around an existing context. -func (obj *EmbdEtcd) CancelCtx(ctx context.Context) (context.Context, func()) { - cancelCtx, cancelFunc := context.WithCancel(ctx) - obj.cancelLock.Lock() - obj.cancels = append(obj.cancels, cancelFunc) // not thread-safe, needs lock - obj.cancelLock.Unlock() - return cancelCtx, cancelFunc -} - -// TimeoutCtx adds a tracked cancel function with timeout around an existing context. -func (obj *EmbdEtcd) TimeoutCtx(ctx context.Context, t time.Duration) (context.Context, func()) { - timeoutCtx, cancelFunc := context.WithTimeout(ctx, t) - obj.cancelLock.Lock() - obj.cancels = append(obj.cancels, cancelFunc) // not thread-safe, needs lock - obj.cancelLock.Unlock() - return timeoutCtx, cancelFunc -} - -// CtxError is called whenever there is a connection or other client problem -// that needs to be resolved before we can continue, eg: connection disconnected, -// change of server to connect to, etc... It modifies the context if needed. -func (obj *EmbdEtcd) CtxError(ctx context.Context, err error) (context.Context, error) { - if obj.ctxErr != nil { // stop on permanent error - return ctx, obj.ctxErr - } - type ctxKey string // use a non-basic type as ctx key (str can conflict) - const ctxErr ctxKey = "ctxErr" - const ctxIter ctxKey = "ctxIter" - expBackoff := func(tmin, texp, iter, tmax int) time.Duration { - // https://en.wikipedia.org/wiki/Exponential_backoff - // tmin <= texp^iter - 1 <= tmax // TODO: check my math - return time.Duration(math.Min(math.Max(math.Pow(float64(texp), float64(iter))-1.0, float64(tmin)), float64(tmax))) * time.Millisecond - } - var isTimeout bool - var iter int // = 0 - if ctxerr, ok := ctx.Value(ctxErr).(error); ok { - if obj.flags.Debug { - log.Printf("Etcd: CtxError: err(%v), ctxerr(%v)", err, ctxerr) - } - if i, ok := ctx.Value(ctxIter).(int); ok { - iter = i + 1 // load and increment - if obj.flags.Debug { - log.Printf("Etcd: CtxError: Iter: %v", iter) - } - } - isTimeout = err == context.DeadlineExceeded - if obj.flags.Debug { - log.Printf("Etcd: CtxError: isTimeout: %v", isTimeout) - } - if !isTimeout { - iter = 0 // reset timer - } - err = ctxerr // restore error - } else if obj.flags.Debug { - log.Printf("Etcd: CtxError: No value found") - } - ctxHelper := func(tmin, texp, tmax int) context.Context { - t := expBackoff(tmin, texp, iter, tmax) - if obj.flags.Debug { - log.Printf("Etcd: CtxError: Timeout: %v", t) - } - - ctxT, _ := obj.TimeoutCtx(ctx, t) - ctxV := context.WithValue(ctxT, ctxIter, iter) // save iter - ctxF := context.WithValue(ctxV, ctxErr, err) // save err - return ctxF - } - _ = ctxHelper // TODO - - isGrpc := func(e error) bool { // helper function - return grpc.ErrorDesc(err) == e.Error() - } - - if err == nil { - log.Fatal("Etcd: CtxError: Error: Unexpected lack of error!") - } - if obj.exiting { - obj.ctxErr = fmt.Errorf("exit in progress") - return ctx, obj.ctxErr - } - - // happens when we trigger the cancels during reconnect - if err == context.Canceled { - // TODO: do we want to create a fresh ctx here for all cancels? - //ctx = context.Background() - ctx, _ = obj.CancelCtx(ctx) // add a new one - return ctx, nil // we should retry, reconnect probably happened - } - - if delayErr, ok := err.(*CtxDelayErr); ok { // custom delay error - log.Printf("Etcd: CtxError: Reason: %s", delayErr.Error()) - time.Sleep(delayErr.Delta) // sleep the amount of time requested - return ctx, nil - } - - if retriesErr, ok := err.(*CtxRetriesErr); ok { // custom retry error - log.Printf("Etcd: CtxError: Reason: %s", retriesErr.Error()) - if retriesErr.Retries == 0 { - obj.ctxErr = fmt.Errorf("no more retries due to CtxRetriesErr") - return ctx, obj.ctxErr - } - return ctx, nil - } - - if permanentErr, ok := err.(*CtxPermanentErr); ok { // custom permanent error - obj.ctxErr = fmt.Errorf("error due to CtxPermanentErr: %s", permanentErr.Error()) - return ctx, obj.ctxErr // quit - } - - if err == etcd.ErrNoAvailableEndpoints { // etcd server is probably starting up - // TODO: tmin, texp, tmax := 500, 2, 16000 // ms, exp base, ms - // TODO: return ctxHelper(tmin, texp, tmax), nil - log.Printf("Etcd: CtxError: No endpoints available yet!") - time.Sleep(500 * time.Millisecond) // a ctx timeout won't help! - return ctx, nil // passthrough - } - - // etcd server is apparently still starting up... - if err == rpctypes.ErrNotCapable { // isGrpc(rpctypes.ErrNotCapable) also matches - log.Printf("Etcd: CtxError: Server is starting up...") - time.Sleep(500 * time.Millisecond) // a ctx timeout won't help! - return ctx, nil // passthrough - } - - if err == grpc.ErrClientConnTimeout { // sometimes caused by "too many colons" misconfiguration - return ctx, fmt.Errorf("misconfiguration: %v", err) // permanent failure? - } - - // this can happen if my client connection shuts down, but without any - // available alternatives. in this case, rotate it off to someone else - reconnectErr, isReconnectErr := err.(*CtxReconnectErr) // custom reconnect error - switch { - case isReconnectErr: - log.Printf("Etcd: CtxError: Reason: %s", reconnectErr.Error()) - fallthrough - case err == raft.ErrStopped: // TODO: does this ever happen? - fallthrough - case err == etcdserver.ErrStopped: // TODO: does this ever happen? - fallthrough - case isGrpc(raft.ErrStopped): - fallthrough - case isGrpc(etcdserver.ErrStopped): - fallthrough - case isGrpc(grpc.ErrClientConnClosing): - - if obj.flags.Debug { - log.Printf("Etcd: CtxError: Error(%T): %+v", err, err) - log.Printf("Etcd: Endpoints are: %v", obj.client.Endpoints()) - log.Printf("Etcd: Client endpoints are: %v", obj.endpoints) - } - - if obj.flags.Debug { - log.Printf("Etcd: CtxError: Locking...") - } - obj.rLock.Lock() - // TODO: should this really be nested inside the other lock? - obj.cancelLock.Lock() - // we need to cancel any WIP connections like Txn()'s and so on - // we run the cancel()'s that are stored up so they don't block - log.Printf("Etcd: CtxError: Cancelling %d operations...", len(obj.cancels)) - for _, cancelFunc := range obj.cancels { - cancelFunc() - } - obj.cancels = []func(){} // reset - obj.cancelLock.Unlock() - - log.Printf("Etcd: CtxError: Reconnecting...") - if err := obj.Connect(true); err != nil { - defer obj.rLock.Unlock() - obj.ctxErr = fmt.Errorf("permanent connect error: %v", err) - return ctx, obj.ctxErr - } - if obj.flags.Debug { - log.Printf("Etcd: CtxError: Unlocking...") - } - obj.rLock.Unlock() - log.Printf("Etcd: CtxError: Reconnected!") - return ctx, nil - } - - // FIXME: we might be one of the members in a two member cluster that - // had the other member crash.. hmmm bork?! - if isGrpc(context.DeadlineExceeded) { - log.Printf("Etcd: CtxError: DeadlineExceeded(%T): %+v", err, err) // TODO - } - - if err == rpctypes.ErrDuplicateKey { - log.Fatalf("Etcd: CtxError: Programming error: %+v", err) - } - - // if you hit this code path here, please report the unmatched error! - log.Printf("Etcd: CtxError: Unknown error(%T): %+v", err, err) - time.Sleep(1 * time.Second) - obj.ctxErr = fmt.Errorf("unknown CtxError") - return ctx, obj.ctxErr -} - -// CbLoop is the loop where callback execution is serialized. -func (obj *EmbdEtcd) CbLoop() { - obj.exitwg.Add(1) - defer obj.exitwg.Done() - cuid := obj.converger.Register() - defer cuid.Unregister() - if e := obj.Connect(false); e != nil { - return // fatal - } - var exitTimeout <-chan time.Time // = nil is implied - // we use this timer because when we ignore un-converge events and loop, - // we reset the ConvergedTimer case statement, ruining the timeout math! - cuid.StartTimer() - for { - ctx := context.Background() // TODO: inherit as input argument? +// Interrupt causes this member to force shutdown. It does not safely wait for +// an ordered shutdown. It is not recommended to use this unless you're borked. +func (obj *EmbdEtcd) Interrupt() error { + obj.Logf("interrupt...") + wg := &sync.WaitGroup{} + var err error + wg.Add(1) + go func() { + defer wg.Done() + err = obj.Destroy() // set return error + }() + wg.Add(1) + go func() { + defer wg.Done() select { - // etcd watcher event - case re := <-obj.wevents: - if !re.skipConv { // if we want to count it... - cuid.ResetTimer() // activity! - } - if obj.flags.Trace { - log.Printf("Trace: Etcd: CbLoop: Event: StartLoop") - } - for { - if obj.exiting { // the exit signal has been sent! - //re.resp.NACK() // nope! - break - } - if obj.flags.Trace { - log.Printf("Trace: Etcd: CbLoop: rawCallback()") - } - err := rawCallback(ctx, re) - if obj.flags.Trace { - log.Printf("Trace: Etcd: CbLoop: rawCallback(): %v", err) - } - if err == nil { - //re.resp.ACK() // success - break - } - re.retries++ // increment error retry count - if ctx, err = obj.CtxError(ctx, err); err != nil { - break // TODO: it's bad, break or return? - } - } - if obj.flags.Trace { - log.Printf("Trace: Etcd: CbLoop: Event: FinishLoop") - } - - // exit loop signal - case <-obj.exitchanCb: - obj.exitchanCb = nil - log.Println("Etcd: Exiting loop shortly...") - // activate exitTimeout switch which only opens after N - // seconds of inactivity in this select switch, which - // lets everything get bled dry to avoid blocking calls - // which would otherwise block us from exiting cleanly! - exitTimeout = util.TimeAfterOrBlock(exitDelay) - - // exit loop commit - case <-exitTimeout: - log.Println("Etcd: Exiting callback loop!") - cuid.StopTimer() // clean up nicely - return + case <-obj.exit.Signal(): // wait for Destroy to run first } - } -} + obj.hardexit.Done(nil) // trigger a hard exit + }() -// Loop is the main loop where everything is serialized. -func (obj *EmbdEtcd) Loop() { - obj.exitwg.Add(1) // TODO: add these to other go routines? - defer obj.exitwg.Done() - cuid := obj.converger.Register() - defer cuid.Unregister() - if e := obj.Connect(false); e != nil { - return // fatal - } - var exitTimeout <-chan time.Time // = nil is implied - cuid.StartTimer() - for { - ctx := context.Background() // TODO: inherit as input argument? - // priority channel... - select { - case aw := <-obj.awq: - cuid.ResetTimer() // activity! - if obj.flags.Trace { - log.Printf("Trace: Etcd: Loop: PriorityAW: StartLoop") - } - obj.loopProcessAW(ctx, aw) - if obj.flags.Trace { - log.Printf("Trace: Etcd: Loop: PriorityAW: FinishLoop") - } - continue // loop to drain the priority channel first! - default: - // passthrough to normal channel - } - - select { - // add watcher - case aw := <-obj.awq: - cuid.ResetTimer() // activity! - if obj.flags.Trace { - log.Printf("Trace: Etcd: Loop: AW: StartLoop") - } - obj.loopProcessAW(ctx, aw) - if obj.flags.Trace { - log.Printf("Trace: Etcd: Loop: AW: FinishLoop") - } - - // set kv pair - case kv := <-obj.setq: - cuid.ResetTimer() // activity! - if obj.flags.Trace { - log.Printf("Trace: Etcd: Loop: Set: StartLoop") - } - for { - if obj.exiting { // the exit signal has been sent! - kv.resp.NACK() // nope! - break - } - err := obj.rawSet(ctx, kv) - if err == nil { - kv.resp.ACK() // success - break - } - if ctx, err = obj.CtxError(ctx, err); err != nil { // try to reconnect, etc... - break // TODO: it's bad, break or return? - } - } - if obj.flags.Trace { - log.Printf("Trace: Etcd: Loop: Set: FinishLoop") - } - - // get value - case gq := <-obj.getq: - if !gq.skipConv { - cuid.ResetTimer() // activity! - } - if obj.flags.Trace { - log.Printf("Trace: Etcd: Loop: Get: StartLoop") - } - for { - if obj.exiting { // the exit signal has been sent! - gq.resp.NACK() // nope! - break - } - data, err := obj.rawGet(ctx, gq) - if err == nil { - gq.data = data // update struct - gq.resp.ACK() // success - break - } - if ctx, err = obj.CtxError(ctx, err); err != nil { - break // TODO: it's bad, break or return? - } - } - if obj.flags.Trace { - log.Printf("Trace: Etcd: Loop: Get: FinishLoop") - } - - // delete value - case dl := <-obj.delq: - cuid.ResetTimer() // activity! - if obj.flags.Trace { - log.Printf("Trace: Etcd: Loop: Delete: StartLoop") - } - for { - if obj.exiting { // the exit signal has been sent! - dl.resp.NACK() // nope! - break - } - data, err := obj.rawDelete(ctx, dl) - if err == nil { - dl.data = data // update struct - dl.resp.ACK() // success - break - } - if ctx, err = obj.CtxError(ctx, err); err != nil { - break // TODO: it's bad, break or return? - } - } - if obj.flags.Trace { - log.Printf("Trace: Etcd: Loop: Delete: FinishLoop") - } - - // run txn - case tn := <-obj.txnq: - cuid.ResetTimer() // activity! - if obj.flags.Trace { - log.Printf("Trace: Etcd: Loop: Txn: StartLoop") - } - for { - if obj.exiting { // the exit signal has been sent! - tn.resp.NACK() // nope! - break - } - data, err := obj.rawTxn(ctx, tn) - if err == nil { - tn.data = data // update struct - tn.resp.ACK() // success - break - } - if ctx, err = obj.CtxError(ctx, err); err != nil { - break // TODO: it's bad, break or return? - } - } - if obj.flags.Trace { - log.Printf("Trace: Etcd: Loop: Txn: FinishLoop") - } - - // exit loop signal - case <-obj.exitchan: - obj.exitchan = nil - log.Println("Etcd: Exiting loop shortly...") - // activate exitTimeout switch which only opens after N - // seconds of inactivity in this select switch, which - // lets everything get bled dry to avoid blocking calls - // which would otherwise block us from exiting cleanly! - exitTimeout = util.TimeAfterOrBlock(exitDelay) - - // exit loop commit - case <-exitTimeout: - log.Println("Etcd: Exiting loop!") - cuid.StopTimer() // clean up nicely - return - } - } -} - -// loopProcessAW is a helper function to facilitate creating priority channels! -func (obj *EmbdEtcd) loopProcessAW(ctx context.Context, aw *AW) { - for { - if obj.exiting { // the exit signal has been sent! - aw.resp.NACK() // nope! - return - } - // cancelFunc is our data payload - cancelFunc, err := obj.rawAddWatcher(ctx, aw) - if err == nil { - aw.cancelFunc = cancelFunc // update struct - aw.resp.ACK() // success - return - } - if ctx, err = obj.CtxError(ctx, err); err != nil { - return // TODO: do something else ? - } - } -} - -// Set queues up a set operation to occur using our mainloop. -func (obj *EmbdEtcd) Set(key, value string, opts ...etcd.OpOption) error { - resp := event.NewResp() - obj.setq <- &KV{key: key, value: value, opts: opts, resp: resp} - if err := resp.Wait(); err != nil { // wait for ack/nack - return fmt.Errorf("Etcd: Set: Probably received an exit: %v", err) - } - return nil -} - -// rawSet actually implements the key set operation. -func (obj *EmbdEtcd) rawSet(ctx context.Context, kv *KV) error { - if obj.flags.Trace { - log.Printf("Trace: Etcd: rawSet()") - } - // key is the full key path - // TODO: should this be : obj.client.KV.Put or obj.client.Put ? - obj.rLock.RLock() // these read locks need to wrap any use of obj.client - response, err := obj.client.KV.Put(ctx, kv.key, kv.value, kv.opts...) - obj.rLock.RUnlock() - log.Printf("Etcd: Set(%s): %v", kv.key, response) // w00t... bonus - if obj.flags.Trace { - log.Printf("Trace: Etcd: rawSet(): %v", err) - } + wg.Wait() return err } -// Get performs a get operation and waits for an ACK to continue. -func (obj *EmbdEtcd) Get(path string, opts ...etcd.OpOption) (map[string]string, error) { - return obj.ComplexGet(path, false, opts...) +// Ready returns a channel that closes when we're up and running. This process +// happens when calling Run. If Run is never called, this will never happen. Our +// main startup must be running, and our client must be connected to get here. +func (obj *EmbdEtcd) Ready() <-chan struct{} { return obj.readySignal } + +// Exited returns a channel that closes when we've destroyed. This process +// happens after Run exits. If Run is never called, this will never happen. +func (obj *EmbdEtcd) Exited() <-chan struct{} { return obj.exitsSignal } + +// config returns the config struct to be used during the etcd client connect. +func (obj *EmbdEtcd) config() etcd.Config { + // FIXME: filter out any urls which wouldn't resolve ? + endpoints := fromURLsMapToStringList(obj.endpoints) // flatten map + // We don't need to do any sort of priority sort here, since for initial + // connect we'd be the only one, so it doesn't matter, and subsequent + // changes are made with SetEndpoints, not here, so we never need to + // prioritize our local endpoint. + sort.Strings(endpoints) // sort for determinism + + if len(endpoints) == 0 { // initially, we need to use the defaults... + for _, u := range obj.Seeds { + endpoints = append(endpoints, u.String()) + } + } + // XXX: connect to our local obj.ClientURLs instead of obj.AClientURLs ? + cfg := etcd.Config{ + Endpoints: endpoints, // eg: []string{"http://254.0.0.1:12345"} + // RetryDialer chooses the next endpoint to use, and comes with + // a default dialer if unspecified. + DialTimeout: ClientDialTimeout, + + // I think the keepalive stuff is needed for endpoint health. + DialKeepAliveTime: ClientDialKeepAliveTime, + DialKeepAliveTimeout: ClientDialKeepAliveTimeout, + + // 0 disables auto-sync. By default auto-sync is disabled. + AutoSyncInterval: 0, // we do something equivalent ourselves + } + return cfg } -// ComplexGet performs a get operation and waits for an ACK to continue. It can -// accept more arguments that are useful for the less common operations. -// TODO: perhaps a get should never cause an un-converge ? -func (obj *EmbdEtcd) ComplexGet(path string, skipConv bool, opts ...etcd.OpOption) (map[string]string, error) { - resp := event.NewResp() - gq := &GQ{path: path, skipConv: skipConv, opts: opts, resp: resp, data: nil} - obj.getq <- gq // send - if err := resp.Wait(); err != nil { // wait for ack/nack - return nil, fmt.Errorf("Etcd: Get: Probably received an exit: %v", err) +// connect connects the client to a server. If we are the first peer, then that +// server is itself. +func (obj *EmbdEtcd) connect() error { + obj.Logf("connect...") + if obj.etcd != nil { + return fmt.Errorf("already connected") } - return gq.data, nil + cfg := obj.config() // get config + var err error + obj.etcd, err = etcd.New(cfg) // connect! + return err } -func (obj *EmbdEtcd) rawGet(ctx context.Context, gq *GQ) (result map[string]string, err error) { - if obj.flags.Trace { - log.Printf("Trace: Etcd: rawGet()") +// disconnect closes the etcd connection. +func (obj *EmbdEtcd) disconnect() error { + obj.Logf("disconnect...") + if obj.etcd == nil { + return fmt.Errorf("already disconnected") } - obj.rLock.RLock() - // TODO: we're checking if this is nil to workaround a nil ptr bug... - if obj.client == nil { // bug? - obj.rLock.RUnlock() - return nil, fmt.Errorf("client is nil") - } - if obj.client.KV == nil { // bug? - obj.rLock.RUnlock() - return nil, fmt.Errorf("client.KV is nil") - } - response, err := obj.client.KV.Get(ctx, gq.path, gq.opts...) - obj.rLock.RUnlock() - if err != nil || response == nil { + + return obj.etcd.Close() +} + +// MakeClient returns an etcd Client interface that is suitable for basic tasks. +// Don't run this until the Ready method has acknowledged. +func (obj *EmbdEtcd) MakeClient() (interfaces.Client, error) { + c := client.NewClientFromClient(obj.etcd) + if err := c.Init(); err != nil { return nil, err } - - // TODO: write a response.ToMap() function on https://godoc.org/github.com/coreos/etcd/etcdserver/etcdserverpb#RangeResponse - result = make(map[string]string) - for _, x := range response.Kvs { - result[bytes.NewBuffer(x.Key).String()] = bytes.NewBuffer(x.Value).String() - } - - if obj.flags.Trace { - log.Printf("Trace: Etcd: rawGet(): %v", result) - } - return + obj.clients = append(obj.clients, c) // make sure to clean up after... + return c, nil } -// Delete performs a delete operation and waits for an ACK to continue. -func (obj *EmbdEtcd) Delete(path string, opts ...etcd.OpOption) (int64, error) { - resp := event.NewResp() - dl := &DL{path: path, opts: opts, resp: resp, data: -1} - obj.delq <- dl // send - if err := resp.Wait(); err != nil { // wait for ack/nack - return -1, fmt.Errorf("Etcd: Delete: Probably received an exit: %v", err) +// MakeClientFromNamespace returns an etcd Client interface that is suitable for +// basic tasks and that has a key namespace prefix. // Don't run this until the +// Ready method has acknowledged. +func (obj *EmbdEtcd) MakeClientFromNamespace(ns string) (interfaces.Client, error) { + kv := namespace.NewKV(obj.etcd.KV, ns) + w := namespace.NewWatcher(obj.etcd.Watcher, ns) + c := client.NewClientFromNamespace(obj.etcd, kv, w) + if err := c.Init(); err != nil { + return nil, err } - return dl.data, nil -} - -func (obj *EmbdEtcd) rawDelete(ctx context.Context, dl *DL) (count int64, err error) { - if obj.flags.Trace { - log.Printf("Trace: Etcd: rawDelete()") - } - count = -1 - obj.rLock.RLock() - response, err := obj.client.KV.Delete(ctx, dl.path, dl.opts...) - obj.rLock.RUnlock() - if err == nil { - count = response.Deleted - } - if obj.flags.Trace { - log.Printf("Trace: Etcd: rawDelete(): %v", err) - } - return -} - -// Txn performs a transaction and waits for an ACK to continue. -func (obj *EmbdEtcd) Txn(ifcmps []etcd.Cmp, thenops, elseops []etcd.Op) (*etcd.TxnResponse, error) { - resp := event.NewResp() - tn := &TN{ifcmps: ifcmps, thenops: thenops, elseops: elseops, resp: resp, data: nil} - obj.txnq <- tn // send - if err := resp.Wait(); err != nil { // wait for ack/nack - return nil, fmt.Errorf("Etcd: Txn: Probably received an exit: %v", err) - } - return tn.data, nil -} - -func (obj *EmbdEtcd) rawTxn(ctx context.Context, tn *TN) (*etcd.TxnResponse, error) { - if obj.flags.Trace { - log.Printf("Trace: Etcd: rawTxn()") - } - obj.rLock.RLock() - response, err := obj.client.KV.Txn(ctx).If(tn.ifcmps...).Then(tn.thenops...).Else(tn.elseops...).Commit() - obj.rLock.RUnlock() - if obj.flags.Trace { - log.Printf("Trace: Etcd: rawTxn(): %v, %v", response, err) - } - return response, err -} - -// AddWatcher queues up an add watcher request and returns a cancel function. -// Remember to add the etcd.WithPrefix() option if you want to watch recursively. -func (obj *EmbdEtcd) AddWatcher(path string, callback func(re *RE) error, errCheck bool, skipConv bool, opts ...etcd.OpOption) (func(), error) { - resp := event.NewResp() - awq := &AW{path: path, opts: opts, callback: callback, errCheck: errCheck, skipConv: skipConv, cancelFunc: nil, resp: resp} - obj.awq <- awq // send - if err := resp.Wait(); err != nil { // wait for ack/nack - return nil, fmt.Errorf("Etcd: AddWatcher: Got NACK: %v", err) - } - return awq.cancelFunc, nil -} - -// rawAddWatcher adds a watcher and returns a cancel function to call to end it. -func (obj *EmbdEtcd) rawAddWatcher(ctx context.Context, aw *AW) (func(), error) { - cancelCtx, cancelFunc := obj.CancelCtx(ctx) - go func(ctx context.Context) { - defer cancelFunc() // it's safe to cancelFunc() more than once! - obj.rLock.RLock() - rch := obj.client.Watcher.Watch(ctx, aw.path, aw.opts...) - obj.rLock.RUnlock() - var rev int64 - var useRev = false - var retry, locked bool = false, false - for { - response := <-rch // read - err := response.Err() - isCanceled := response.Canceled || err == context.Canceled - if response.Header.Revision == 0 { // by inspection - if obj.flags.Debug { - log.Printf("Etcd: Watch: Received empty message!") // switched client connection - } - isCanceled = true - } - - if isCanceled { - if obj.exiting { // if not, it could be reconnect - return - } - err = context.Canceled - } - - if err == nil { // watch from latest good revision - rev = response.Header.Revision // TODO: +1 ? - useRev = true - if !locked { - retry = false - } - locked = false - } else { - if obj.flags.Debug { - log.Printf("Etcd: Watch: Error: %v", err) // probably fixable - } - // this new context is the fix for a tricky set - // of bugs which were encountered when re-using - // the existing canceled context! it has state! - ctx = context.Background() // this is critical! - - if ctx, err = obj.CtxError(ctx, err); err != nil { - return // TODO: it's bad, break or return? - } - - // remake it, but add old Rev when valid - opts := []etcd.OpOption{} - if useRev { - opts = append(opts, etcd.WithRev(rev)) - } - opts = append(opts, aw.opts...) - rch = nil - obj.rLock.RLock() - if obj.client == nil { - defer obj.rLock.RUnlock() - return // we're exiting - } - rch = obj.client.Watcher.Watch(ctx, aw.path, opts...) - obj.rLock.RUnlock() - locked = true - retry = true - continue - } - - // the response includes a list of grouped events, each - // of which includes one Kv struct. Send these all in a - // batched group so that they are processed together... - obj.wevents <- &RE{response: response, path: aw.path, err: err, callback: aw.callback, errCheck: aw.errCheck, skipConv: aw.skipConv, retryHint: retry} // send event - } - }(cancelCtx) - return cancelFunc, nil -} - -// rawCallback is the companion to AddWatcher which runs the callback processing. -func rawCallback(ctx context.Context, re *RE) error { - var err = re.err // the watch event itself might have had an error - if err == nil { - if callback := re.callback; callback != nil { - // TODO: we could add an async option if needed - // NOTE: the callback must *not* block! - // FIXME: do we need to pass ctx in via *RE, or in the callback signature ? - err = callback(re) // run the callback - if !re.errCheck || err == nil { - return nil - } - } else { - return nil - } - } - return err -} - -// volunteerCallback runs to respond to the volunteer list change events. -// Functionally, it controls the adding and removing of members. -// FIXME: we might need to respond to member change/disconnect/shutdown events, -// see: https://github.com/coreos/etcd/issues/5277 -func (obj *EmbdEtcd) volunteerCallback(re *RE) error { - if obj.flags.Trace { - log.Printf("Trace: Etcd: volunteerCallback()") - defer log.Printf("Trace: Etcd: volunteerCallback(): Finished!") - } - if err := obj.Connect(false); err != nil { - log.Printf("Etcd: volunteerCallback(): Connect failed permanently: %v", err) - // permanently fail... - return &CtxPermanentErr{fmt.Sprintf("Etcd: volunteerCallback(): Connect error: %s", err)} - } - var err error - - // FIXME: if this is running in response to our OWN volunteering offer, - // skip doing stuff if we're not a server yet because it's pointless, - // and we might have just lost quorum if we just got nominated! Why the - // lack of quorum is needed to read data in etcd v3 but not in v2 is a - // mystery for now, since in v3 this now blocks! Maybe it's that the - // Maintenance.Status API requires a leader to return? Maybe that's it! - // FIXME: are there any situations where we don't want to short circuit - // here, such as if i'm the last node? - if obj.server == nil { - return nil // if we're not a server, we're not a leader, return - } - - membersMap, err := Members(obj) // map[uint64]string - if err != nil { - return fmt.Errorf("Etcd: Members: Error: %+v", err) - } - members := util.StrMapValuesUint64(membersMap) // get values - log.Printf("Etcd: Members: List: %+v", members) - - // we only do *one* change operation at a time so that the cluster can - // advance safely. we ensure this by returning CtxDelayErr any time an - // operation happens to ensure the function will reschedule itself due - // to the CtxError processing after this callback "fails". This custom - // error is caught by CtxError, and lets us specify a retry delay too! - - // check for unstarted members, since we're currently "unhealthy" - for mID, name := range membersMap { - if name == "" { - // reschedule in one second - // XXX: will the unnominate TTL still happen if we are - // in an unhealthy state? that's what we're waiting for - return &CtxDelayErr{2 * time.Second, fmt.Sprintf("unstarted member, mID: %d", mID)} - } - } - - leader, err := Leader(obj) // XXX: race! - if err != nil { - log.Printf("Etcd: Leader: Error: %+v", err) - return fmt.Errorf("Etcd: Leader: Error: %+v", err) - } - log.Printf("Etcd: Leader: %+v", leader) - if leader != obj.hostname { - log.Printf("Etcd: We are not the leader...") - return nil - } - // i am the leader! - - // get the list of available volunteers - volunteersMap, err := Volunteers(obj) - if err != nil { - log.Printf("Etcd: Volunteers: Error: %+v", err) - return fmt.Errorf("Etcd: Volunteers: Error: %+v", err) - } - - volunteers := []string{} // get keys - for k := range volunteersMap { - volunteers = append(volunteers, k) - } - sort.Strings(volunteers) // deterministic order - log.Printf("Etcd: Volunteers: %v", volunteers) - - // unnominate anyone that unvolunteers, so that they can shutdown cleanly - quitters := util.StrFilterElementsInList(volunteers, members) - log.Printf("Etcd: Quitters: %v", quitters) - - // if we're the only member left, just shutdown... - if len(members) == 1 && members[0] == obj.hostname && len(quitters) == 1 && quitters[0] == obj.hostname { - log.Printf("Etcd: Quitters: Shutting down self...") - if err := Nominate(obj, obj.hostname, nil); err != nil { // unnominate myself - return &CtxDelayErr{1 * time.Second, fmt.Sprintf("error shutting down self: %v", err)} - } - return nil - } - - candidates := util.StrFilterElementsInList(members, volunteers) - log.Printf("Etcd: Candidates: %v", candidates) - - // TODO: switch to < 0 so that we can shut the whole cluster down with 0 - if obj.idealClusterSize < 1 { // safety in case value is not ready yet - return &CtxDelayErr{1 * time.Second, "The idealClusterSize is < 1."} // retry in one second - } - - // do we need more members? - if len(candidates) > 0 && len(members)-len(quitters) < int(obj.idealClusterSize) { - chosen := candidates[0] // XXX: use a better picker algorithm - peerURLs := volunteersMap[chosen] // comma separated list of urls - - // NOTE: storing peerURLs when they're already in volunteers/ is - // redundant, but it seems to be necessary for a sane algorithm. - // nominate before we call the API so that members see it first! - Nominate(obj, chosen, peerURLs) - // XXX: add a ttl here, because once we nominate someone, we - // need to give them up to N seconds to start up after we run - // the MemberAdd API because if they don't, in some situations - // such as if we're adding the second node to the cluster, then - // we've lost quorum until a second member joins! If the TTL - // expires, we need to MemberRemove! In this special case, we - // need to forcefully remove the second member if we don't add - // them, because we'll be in a lack of quorum state and unable - // to do anything... As a result, we should always only add ONE - // member at a time! - - log.Printf("Etcd: Member Add: %v", peerURLs) - mresp, err := MemberAdd(obj, peerURLs) - if err != nil { - // on error this function will run again, which is good - // because we need to make sure to run the below parts! - return fmt.Errorf("Etcd: Member Add: Error: %+v", err) - } - log.Printf("Etcd: Member Add: %+v", mresp.Member.PeerURLs) - // return and reschedule to check for unstarted members, etc... - return &CtxDelayErr{1 * time.Second, fmt.Sprintf("Member %s added successfully!", chosen)} // retry asap - - } else if len(quitters) == 0 && len(members) > int(obj.idealClusterSize) { // too many members - for _, kicked := range members { - // don't kick ourself unless we are the only one left... - if kicked != obj.hostname || (obj.idealClusterSize == 0 && len(members) == 1) { - quitters = []string{kicked} // XXX: use a better picker algorithm - log.Printf("Etcd: Extras: %v", quitters) - break - } - } - } - - // we must remove them from the members API or it will look like a crash - if lq := len(quitters); lq > 0 { - log.Printf("Etcd: Quitters: Shutting down %d members...", lq) - } - for _, quitter := range quitters { - mID, ok := util.Uint64KeyFromStrInMap(quitter, membersMap) - if !ok { - // programming error - log.Fatalf("Etcd: Member Remove: Error: %v(%v) not in members list!", quitter, mID) - } - Nominate(obj, quitter, nil) // unnominate - // once we issue the above unnominate, that peer will - // shutdown, and this might cause us to loose quorum, - // therefore, let that member remove itself, and then - // double check that it did happen in case delinquent - // TODO: get built-in transactional member Add/Remove - // functionality to avoid a separate nominate list... - if quitter == obj.hostname { // remove in unnominate! - log.Printf("Etcd: Quitters: Removing self...") - continue // TODO: CtxDelayErr ? - } - - log.Printf("Etcd: Waiting %d seconds for %s to self remove...", selfRemoveTimeout, quitter) - time.Sleep(selfRemoveTimeout * time.Second) - // in case the removed member doesn't remove itself, do it! - removed, err := MemberRemove(obj, mID) - if err != nil { - return fmt.Errorf("Etcd: Member Remove: Error: %+v", err) - } - if removed { - log.Printf("Etcd: Member Removed (forced): %v(%v)", quitter, mID) - } - - // Remove the endpoint from our list to avoid blocking - // future MemberList calls which would try and connect - // to a missing endpoint... The endpoints should get - // updated from the member exiting safely if it doesn't - // crash, but if it did and/or since it's a race to see - // if the update event will get seen before we need the - // new data, just do it now anyways, then update the - // endpoint list and trigger a reconnect. - delete(obj.endpoints, quitter) // proactively delete it - obj.endpointCallback(nil) // update! - log.Printf("Member %s (%d) removed successfully!", quitter, mID) - return &CtxReconnectErr{"a member was removed"} // retry asap and update endpoint list - } - - return nil -} - -// nominateCallback runs to respond to the nomination list change events. -// Functionally, it controls the starting and stopping of the server process. -func (obj *EmbdEtcd) nominateCallback(re *RE) error { - if obj.flags.Trace { - log.Printf("Trace: Etcd: nominateCallback()") - defer log.Printf("Trace: Etcd: nominateCallback(): Finished!") - } - bootstrapping := len(obj.endpoints) == 0 - var revision int64 // = 0 - if re != nil { - revision = re.response.Header.Revision - } - if !bootstrapping && (re == nil || revision != obj.lastRevision) { - // don't reprocess if we've already processed this message - // this can happen if the callback errors and is re-called - obj.lastRevision = revision - - // if we tried to lookup the nominated members here (in etcd v3) - // this would sometimes block because we would loose the cluster - // leader once the current leader calls the MemberAdd API and it - // steps down trying to form a two host cluster. Instead, we can - // look at the event response data to read the nominated values! - //nominated, err = Nominated(obj) // nope, won't always work - // since we only see what has *changed* in the response data, we - // have to keep track of the original state and apply the deltas - // this must be idempotent in case it errors and is called again - // if we're retrying and we get a data format error, it's normal - nominated := obj.nominated - if nominated, err := ApplyDeltaEvents(re, nominated); err == nil { - obj.nominated = nominated - } else if !re.retryHint || err != errApplyDeltaEventsInconsistent { - log.Fatal(err) - } - - } else { - // TODO: should we just use the above delta method for everything? - //nominated, err := Nominated(obj) // just get it - //if err != nil { - // return fmt.Errorf("Etcd: Nominate: Error: %+v", err) - //} - //obj.nominated = nominated // update our local copy - } - if n := obj.nominated; len(n) > 0 { - log.Printf("Etcd: Nominated: %+v", n) - } else { - log.Printf("Etcd: Nominated: []") - } - - // if there are no other peers, we create a new server - _, exists := obj.nominated[obj.hostname] - // FIXME: can we get rid of the len(obj.nominated) == 0 ? - newCluster := len(obj.nominated) == 0 || (len(obj.nominated) == 1 && exists) - if obj.flags.Debug { - log.Printf("Etcd: nominateCallback(): newCluster: %v; exists: %v; obj.server == nil: %t", newCluster, exists, obj.server == nil) - } - // XXX: check if i have actually volunteered first of all... - if obj.server == nil && (newCluster || exists) { - - log.Printf("Etcd: StartServer(newCluster: %t): %+v", newCluster, obj.nominated) - err := obj.StartServer( - newCluster, // newCluster - obj.nominated, // other peer members and urls or empty map - ) - if err != nil { - var retries uint - if re != nil { - retries = re.retries - } - // retry MaxStartServerRetries times, then permanently fail - return &CtxRetriesErr{MaxStartServerRetries - retries, fmt.Sprintf("Etcd: StartServer: Error: %+v", err)} - } - - if len(obj.endpoints) == 0 { - // add server to obj.endpoints list... - addresses := obj.LocalhostClientURLs() - if len(addresses) == 0 { - // probably a programming error... - log.Fatal("Etcd: No valid clientUrls exist!") - } - obj.endpoints[obj.hostname] = addresses // now we have some! - // client connects to one of the obj.endpoints servers... - log.Printf("Etcd: Addresses are: %s", addresses) - - surls := obj.serverURLs - if len(obj.advertiseServerURLs) > 0 { - surls = obj.advertiseServerURLs - } - // XXX: just put this wherever for now so we don't block - // nominate self so "member" list is correct for peers to see - Nominate(obj, obj.hostname, surls) - // XXX: if this fails, where will we retry this part ? - } - - // advertise client urls - if curls := obj.clientURLs; len(curls) > 0 { - if len(obj.advertiseClientURLs) > 0 { - curls = obj.advertiseClientURLs - } - // XXX: don't advertise local addresses! 127.0.0.1:2381 doesn't really help remote hosts - // XXX: but sometimes this is what we want... hmmm how do we decide? filter on callback? - AdvertiseEndpoints(obj, curls) - // XXX: if this fails, where will we retry this part ? - - // force this to remove sentinel before we reconnect... - obj.endpointCallback(nil) - } - - return &CtxReconnectErr{"local server is running"} // trigger reconnect to self - - } else if obj.server != nil && !exists { - // un advertise client urls - AdvertiseEndpoints(obj, nil) - - // i have been un-nominated, remove self and shutdown server! - if len(obj.nominated) != 0 { // don't call if nobody left but me! - // this works around: https://github.com/coreos/etcd/issues/5482, - // and it probably makes sense to avoid calling if we're the last - log.Printf("Etcd: Member Remove: Removing self: %v", obj.memberID) - removed, err := MemberRemove(obj, obj.memberID) - if err != nil { - return fmt.Errorf("Etcd: Member Remove: Error: %+v", err) - } - if removed { - log.Printf("Etcd: Member Removed (self): %v(%v)", obj.hostname, obj.memberID) - } - } - - log.Printf("Etcd: DestroyServer...") - obj.DestroyServer() - // TODO: make sure to think about the implications of - // shutting down and potentially intercepting signals - // here after i've removed myself from the nominated! - - // if we are connected to self and other servers exist: trigger - // if any of the obj.clientURLs are in the endpoints list, then - // we are stale. it is not likely that the advertised endpoints - // have been updated because we're still blocking the callback. - stale := false - for key, eps := range obj.endpoints { - if key != obj.hostname && len(eps) > 0 { // other endpoints? - stale = true // only half true so far - break - } - } - - for _, curl := range obj.clientURLs { // these just got shutdown - for _, ep := range obj.client.Endpoints() { - if (curl.Host == ep || curl.String() == ep) && stale { - // add back the sentinel to force update - log.Printf("Etcd: Forcing endpoint callback...") - obj.endpoints[seedSentinel] = nil //etcdtypes.URLs{} - obj.endpointCallback(nil) // update! - return &CtxReconnectErr{"local server has shutdown"} // trigger reconnect - } - } - } - } - return nil -} - -// endpointCallback runs to respond to the endpoint list change events. -func (obj *EmbdEtcd) endpointCallback(re *RE) error { - if obj.flags.Trace { - log.Printf("Trace: Etcd: endpointCallback()") - defer log.Printf("Trace: Etcd: endpointCallback(): Finished!") - } - - // if the startup sentinel exists, or delta fails, then get a fresh copy - endpoints := make(etcdtypes.URLsMap, len(obj.endpoints)) - // this would copy the reference: endpoints := obj.endpoints - for k, v := range obj.endpoints { - endpoints[k] = make(etcdtypes.URLs, len(v)) - copy(endpoints[k], v) - } - - // updating - _, exists := endpoints[seedSentinel] - endpoints, err := ApplyDeltaEvents(re, endpoints) - if err != nil || exists { - // TODO: we could also lookup endpoints from the maintenance api - endpoints, err = Endpoints(obj) - if err != nil { - return err - } - } - - // change detection - var changed = false // do we need to update? - if len(obj.endpoints) != len(endpoints) { - changed = true - } - for k, v1 := range obj.endpoints { - if changed { // catches previous statement and inner loop break - break - } - v2, exists := endpoints[k] - if !exists { - changed = true - break - } - if len(v1) != len(v2) { - changed = true - break - } - for i := range v1 { - if v1[i] != v2[i] { - changed = true - break - } - } - } - // is the endpoint list different? - if changed { - obj.endpoints = endpoints // set - if eps := endpoints; len(eps) > 0 { - log.Printf("Etcd: Endpoints: %+v", eps) - } else { - log.Printf("Etcd: Endpoints: []") - } - // can happen if a server drops out for example - return &CtxReconnectErr{"endpoint list changed"} // trigger reconnect with new endpoint list - } - - return nil -} - -// idealClusterSizeCallback runs to respond to the ideal cluster size changes. -func (obj *EmbdEtcd) idealClusterSizeCallback(re *RE) error { - if obj.flags.Trace { - log.Printf("Trace: Etcd: idealClusterSizeCallback()") - defer log.Printf("Trace: Etcd: idealClusterSizeCallback(): Finished!") - } - path := fmt.Sprintf("%s/idealClusterSize", NS) - for _, event := range re.response.Events { - if key := bytes.NewBuffer(event.Kv.Key).String(); key != path { - continue - } - if event.Type != etcd.EventTypePut { - continue - } - val := bytes.NewBuffer(event.Kv.Value).String() - if val == "" { - continue - } - v, err := strconv.ParseUint(val, 10, 16) - if err != nil { - continue - } - if i := uint16(v); i > 0 { - log.Printf("Etcd: Ideal cluster size is now: %d", i) - obj.idealClusterSize = i - // now, emulate the calling of the volunteerCallback... - go func() { - obj.wevents <- &RE{callback: obj.volunteerCallback, errCheck: true} // send event - }() // don't block - } - } - return nil -} - -// LocalhostClientURLs returns the most localhost like URLs for direct connection. -// This gets clients to talk to the local servers first before searching remotely. -func (obj *EmbdEtcd) LocalhostClientURLs() etcdtypes.URLs { - // look through obj.clientURLs and return the localhost ones - urls := etcdtypes.URLs{} - for _, x := range obj.clientURLs { - // "localhost", ::1 or anything in 127.0.0.0/8 is valid! - if s := x.Host; strings.HasPrefix(s, "localhost") || strings.HasPrefix(s, "127.") || strings.HasPrefix(s, "[::1]") { - urls = append(urls, x) - } - // or local unix domain socket - if x.Scheme == "unix" { - urls = append(urls, x) - } - } - return urls -} - -// StartServer kicks of a new embedded etcd server. -func (obj *EmbdEtcd) StartServer(newCluster bool, peerURLsMap etcdtypes.URLsMap) error { - var err error - memberName := obj.hostname - - err = os.MkdirAll(obj.dataDir, 0770) - if err != nil { - log.Printf("Etcd: StartServer: Couldn't mkdir: %s.", obj.dataDir) - log.Printf("Etcd: StartServer: Mkdir error: %s.", err) - obj.DestroyServer() - return err - } - - // if no peer URLs exist, then starting a server is mostly only for some - // testing, but etcd doesn't allow the value to be empty so we use this! - peerURLs, _ := etcdtypes.NewURLs([]string{"http://localhost:0"}) - if len(obj.serverURLs) > 0 { - peerURLs = obj.serverURLs - } - initialPeerURLsMap := make(etcdtypes.URLsMap) - for k, v := range peerURLsMap { - initialPeerURLsMap[k] = v // copy - } - if _, exists := peerURLsMap[memberName]; !exists { - initialPeerURLsMap[memberName] = peerURLs - } - - aCUrls := obj.clientURLs - if len(obj.advertiseClientURLs) > 0 { - aCUrls = obj.advertiseClientURLs - } - aPUrls := peerURLs - if len(obj.advertiseServerURLs) > 0 { - aPUrls = obj.advertiseServerURLs - } - - // embed etcd - cfg := embed.NewConfig() - cfg.Name = memberName // hostname - cfg.Dir = obj.dataDir - cfg.LCUrls = obj.clientURLs - cfg.LPUrls = peerURLs - cfg.ACUrls = aCUrls - cfg.APUrls = aPUrls - cfg.StrictReconfigCheck = false // XXX: workaround https://github.com/coreos/etcd/issues/6305 - cfg.MaxTxnOps = DefaultMaxTxnOps - - cfg.InitialCluster = initialPeerURLsMap.String() // including myself! - if newCluster { - cfg.ClusterState = embed.ClusterStateFlagNew - } else { - cfg.ClusterState = embed.ClusterStateFlagExisting - } - //cfg.ForceNewCluster = newCluster // TODO: ? - - log.Printf("Etcd: StartServer: Starting server...") - obj.server, err = embed.StartEtcd(cfg) - if err != nil { - return err - } - select { - case <-obj.server.Server.ReadyNotify(): // we hang here if things are bad - log.Printf("Etcd: StartServer: Done starting server!") // it didn't hang! - case <-time.After(time.Duration(MaxStartServerTimeout) * time.Second): - e := fmt.Errorf("timeout of %d seconds reached", MaxStartServerTimeout) - log.Printf("Etcd: StartServer: %s", e.Error()) - obj.server.Server.Stop() // trigger a shutdown - obj.serverwg.Add(1) // add for the DestroyServer() - obj.DestroyServer() - return e - // TODO: should we wait for this notification elsewhere? - case <-obj.server.Server.StopNotify(): // it's going down now... - e := fmt.Errorf("received stop notification") - log.Printf("Etcd: StartServer: %s", e.Error()) - obj.server.Server.Stop() // trigger a shutdown - obj.serverwg.Add(1) // add for the DestroyServer() - obj.DestroyServer() - return e - } - //log.Fatal(<-obj.server.Err()) XXX - log.Printf("Etcd: StartServer: Server running...") - obj.memberID = uint64(obj.server.Server.ID()) // store member id for internal use - close(obj.serverReady) // send a signal - - obj.serverwg.Add(1) - return nil -} - -// ServerReady returns on a channel when the server has started successfully. -func (obj *EmbdEtcd) ServerReady() <-chan struct{} { return obj.serverReady } - -// DestroyServer shuts down the embedded etcd server portion. -func (obj *EmbdEtcd) DestroyServer() error { - var err error - log.Printf("Etcd: DestroyServer: Destroying...") - if obj.server != nil { - obj.server.Close() // this blocks until server has stopped - } - log.Printf("Etcd: DestroyServer: Done closing...") - - obj.memberID = 0 - if obj.server == nil { // skip the .Done() below because we didn't .Add(1) it. - return err - } - obj.server = nil // important because this is used as an isRunning flag - log.Printf("Etcd: DestroyServer: Unlocking server...") - obj.serverReady = make(chan struct{}) // reset the signal - obj.serverwg.Done() // -1 - return err -} - -//func UrlRemoveScheme(urls etcdtypes.URLs) []string { -// strs := []string{} -// for _, u := range urls { -// strs = append(strs, u.Host) // remove http:// prefix -// } -// return strs -//} - -// ApplyDeltaEvents modifies a URLsMap with the deltas from a WatchResponse. -func ApplyDeltaEvents(re *RE, urlsmap etcdtypes.URLsMap) (etcdtypes.URLsMap, error) { - if re == nil { // passthrough - return urlsmap, nil - } - for _, event := range re.response.Events { - key := bytes.NewBuffer(event.Kv.Key).String() - key = key[len(re.path):] // remove path prefix - log.Printf("Etcd: ApplyDeltaEvents: Event(%s): %s", event.Type.String(), key) - - switch event.Type { - case etcd.EventTypePut: - val := bytes.NewBuffer(event.Kv.Value).String() - if val == "" { - return nil, fmt.Errorf("value in ApplyDeltaEvents is empty") - } - urls, err := etcdtypes.NewURLs(strings.Split(val, ",")) - if err != nil { - return nil, fmt.Errorf("format error in ApplyDeltaEvents: %v", err) - } - urlsmap[key] = urls // add to map - - // expiry cases are seen as delete in v3 for now - //case etcd.EventTypeExpire: // doesn't exist right now - // fallthrough - case etcd.EventTypeDelete: - if _, exists := urlsmap[key]; !exists { - // this can happen if we retry an operation b/w - // a reconnect so ignore if we are reconnecting - log.Printf("Etcd: ApplyDeltaEvents: Inconsistent key: %v", key) - return nil, errApplyDeltaEventsInconsistent - } - delete(urlsmap, key) - - default: - return nil, fmt.Errorf("unknown event in ApplyDeltaEvents: %+v", event.Type) - } - } - return urlsmap, nil + obj.clients = append(obj.clients, c) // make sure to clean up after... + return c, nil } diff --git a/etcd/etcd_test.go b/etcd/etcd_test.go index 8a794461..30b41b0d 100644 --- a/etcd/etcd_test.go +++ b/etcd/etcd_test.go @@ -21,31 +21,19 @@ package etcd import ( "testing" - - etcdtypes "github.com/coreos/etcd/pkg/types" ) -func TestNewEmbdEtcd(t *testing.T) { - // should return a new etcd object - - noServer := false - var flags Flags - - obj := NewEmbdEtcd("", nil, nil, nil, nil, nil, noServer, false, 0, flags, "", nil) - if obj == nil { - t.Fatal("failed to create server object") - } -} - -func TestNewEmbdEtcdConfigValidation(t *testing.T) { - // running --no-server with no --seeds specified should fail early - - seeds := make(etcdtypes.URLs, 0) - noServer := true - var flags Flags - - obj := NewEmbdEtcd("", seeds, nil, nil, nil, nil, noServer, false, 0, flags, "", nil) - if obj != nil { - t.Fatal("server initialization should fail on invalid configuration") +func TestValidation1(t *testing.T) { + // running --no-server with no --seeds should not validate at the moment + embdEtcd := &EmbdEtcd{ + //Seeds: etcdtypes.URLs{}, + NoServer: true, + } + if err := embdEtcd.Validate(); err == nil { + t.Errorf("expected validation err, got nil") + } + if err := embdEtcd.Init(); err == nil { + t.Errorf("expected init err, got nil") + defer embdEtcd.Close() } } diff --git a/etcd/event/event.go b/etcd/event/event.go deleted file mode 100644 index 4601d2c3..00000000 --- a/etcd/event/event.go +++ /dev/null @@ -1,70 +0,0 @@ -// Mgmt -// Copyright (C) 2013-2019+ James Shubin and the project contributors -// Written by James Shubin and the project contributors -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - -// Package event provides some primitives that are used for message passing. -package event - -import ( - "fmt" -) - -// Resp is a channel to be used for boolean responses. A nil represents an ACK, -// and a non-nil represents a NACK (false). This also lets us use custom errors. -type Resp chan error - -// NewResp is just a helper to return the right type of response channel. -func NewResp() Resp { - resp := make(chan error) - return resp -} - -// ACK sends a true value to resp. -func (resp Resp) ACK() { - if resp != nil { - resp <- nil // TODO: close instead? - } -} - -// NACK sends a false value to resp. -func (resp Resp) NACK() { - if resp != nil { - resp <- fmt.Errorf("NACK") - } -} - -// ACKNACK sends a custom ACK or NACK. The ACK value is always nil, the NACK can -// be any non-nil error value. -func (resp Resp) ACKNACK(err error) { - if resp != nil { - resp <- err - } -} - -// Wait waits for any response from a Resp channel and returns it. -func (resp Resp) Wait() error { - return <-resp -} - -// ACKWait waits for a +ive Ack from a Resp channel. -func (resp Resp) ACKWait() { - for { - // wait until true value - if resp.Wait() == nil { - return - } - } -} diff --git a/etcd/fs/file.go b/etcd/fs/file.go index 22d48d3a..0ef637bc 100644 --- a/etcd/fs/file.go +++ b/etcd/fs/file.go @@ -22,7 +22,6 @@ import ( "encoding/gob" "fmt" "io" - "log" "os" "path" "strings" @@ -32,6 +31,7 @@ import ( "github.com/purpleidea/mgmt/util/errwrap" etcd "github.com/coreos/etcd/clientv3" // "clientv3" + etcdutil "github.com/coreos/etcd/clientv3/clientv3util" ) func init() { @@ -263,10 +263,8 @@ func (obj *File) Sync() error { p := obj.path() // store file data at this path in etcd - // TODO: use https://github.com/coreos/etcd/pull/7417 if merged - cmp := etcd.Compare(etcd.Version(p), "=", 0) // KeyMissing - //cmp := etcd.KeyMissing(p)) - + //cmp := etcd.Compare(etcd.Version(p), "=", 0) // KeyMissing + cmp := etcdutil.KeyMissing(p) op := etcd.OpPut(p, string(obj.data)) // this pushes contents to server // it's important to do this in one transaction, and atomically, because @@ -277,7 +275,7 @@ func (obj *File) Sync() error { } if !result.Succeeded { if obj.fs.Debug { - log.Printf("debug: data already exists in storage") + obj.fs.Logf("debug: data already exists in storage") } } diff --git a/etcd/fs/fs.go b/etcd/fs/fs.go index 2ece4389..1f9a4dd1 100644 --- a/etcd/fs/fs.go +++ b/etcd/fs/fs.go @@ -20,6 +20,7 @@ package fs import ( "bytes" + "context" "crypto/sha256" "encoding/gob" "encoding/hex" @@ -27,19 +28,18 @@ import ( "fmt" "hash" "io" - "log" "os" "path" "strings" "syscall" "time" + "github.com/purpleidea/mgmt/etcd/interfaces" "github.com/purpleidea/mgmt/util/errwrap" etcd "github.com/coreos/etcd/clientv3" // "clientv3" rpctypes "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes" "github.com/spf13/afero" - context "golang.org/x/net/context" ) func init() { @@ -91,7 +91,7 @@ var ( // XXX: this is harder because we need to list of *all* metadata paths, if we // want them to be able to share storage backends. (we do) type Fs struct { - Client *etcd.Client + Client interfaces.Client Metadata string // location of "superblock" for this filesystem @@ -99,6 +99,7 @@ type Fs struct { Hash string // eg: sha256 Debug bool + Logf func(format string, v ...interface{}) sb *superBlock mounted bool @@ -115,7 +116,7 @@ type superBlock struct { // NewEtcdFs creates a new filesystem handle on an etcd client connection. You // must specify the metadata string that you wish to use. -func NewEtcdFs(client *etcd.Client, metadata string) afero.Fs { +func NewEtcdFs(client interfaces.Client, metadata string) afero.Fs { return &Fs{ Client: client, Metadata: metadata, @@ -127,23 +128,26 @@ func (obj *Fs) get(path string, opts ...etcd.OpOption) (map[string][]byte, error ctx, cancel := context.WithTimeout(context.Background(), EtcdTimeout) resp, err := obj.Client.Get(ctx, path, opts...) cancel() - if err != nil || resp == nil { + if err != nil { return nil, err } + if resp == nil { + return nil, fmt.Errorf("empty response") + } - // TODO: write a resp.ToMap() function on https://godoc.org/github.com/coreos/etcd/etcdserver/etcdserverpb#RangeResponse - result := make(map[string][]byte) // formerly: map[string][]byte - for _, x := range resp.Kvs { - result[string(x.Key)] = x.Value // formerly: bytes.NewBuffer(x.Value).String() + // FIXME: just return resp instead if it was map[string]string? + result := make(map[string][]byte) + for key, val := range resp { + result[key] = []byte(val) // wasteful transform } return result, nil } // put a value into etcd. -func (obj *Fs) put(path string, data []byte, opts ...etcd.OpOption) error { +func (obj *Fs) set(path string, data []byte, opts ...etcd.OpOption) error { ctx, cancel := context.WithTimeout(context.Background(), EtcdTimeout) - _, err := obj.Client.Put(ctx, path, string(data), opts...) // TODO: obj.Client.KV ? + err := obj.Client.Set(ctx, path, string(data), opts...) cancel() if err != nil { switch err { @@ -163,7 +167,7 @@ func (obj *Fs) put(path string, data []byte, opts ...etcd.OpOption) error { // txn runs a txn in etcd. func (obj *Fs) txn(ifcmps []etcd.Cmp, thenops, elseops []etcd.Op) (*etcd.TxnResponse, error) { ctx, cancel := context.WithTimeout(context.Background(), EtcdTimeout) - resp, err := obj.Client.Txn(ctx).If(ifcmps...).Then(thenops...).Else(elseops...).Commit() + resp, err := obj.Client.Txn(ctx, ifcmps, thenops, elseops) cancel() return resp, err } @@ -194,7 +198,7 @@ func (obj *Fs) sync() error { return errwrap.Wrapf(err, "gob failed to encode") } //base64.StdEncoding.EncodeToString(b.Bytes()) - return obj.put(obj.Metadata, b.Bytes()) + return obj.set(obj.Metadata, b.Bytes()) } // mount downloads the initial cache of metadata, including the *file tree. @@ -213,7 +217,7 @@ func (obj *Fs) mount() error { } if result == nil || len(result) == 0 { // nothing found, create the fs if obj.Debug { - log.Printf("debug: mount: creating new fs at: %s", obj.Metadata) + obj.Logf("mount: creating new fs at: %s", obj.Metadata) } // trim any trailing slashes from DataPrefix for strings.HasSuffix(obj.DataPrefix, "/") { @@ -248,7 +252,7 @@ func (obj *Fs) mount() error { } if obj.Debug { - log.Printf("debug: mount: opening old fs at: %s", obj.Metadata) + obj.Logf("mount: opening old fs at: %s", obj.Metadata) } sb, exists := result[obj.Metadata] if !exists { diff --git a/etcd/fs/fs_test.go b/etcd/fs/fs_test.go index fd864063..41885215 100644 --- a/etcd/fs/fs_test.go +++ b/etcd/fs/fs_test.go @@ -26,7 +26,7 @@ import ( "syscall" "testing" - "github.com/purpleidea/mgmt/etcd" + "github.com/purpleidea/mgmt/etcd/client" etcdfs "github.com/purpleidea/mgmt/etcd/fs" "github.com/purpleidea/mgmt/integration" "github.com/purpleidea/mgmt/util" @@ -41,6 +41,7 @@ import ( const ( umask = 0666 superblock = "/some/superblock" // TODO: generate randomly per test? + ns = "/_mgmt/test" // must not end with a slash! ) // Ensure that etcdfs.Fs implements afero.Fs. @@ -79,20 +80,26 @@ func TestFs1(t *testing.T) { } defer stopEtcd() // ignore the error - etcdClient := &etcd.ClientEtcd{ - Seeds: []string{"localhost:2379"}, // endpoints + logf := func(format string, v ...interface{}) { + t.Logf("test: etcd: fs: "+format, v...) } + etcdClient := client.NewClientFromSeedsNamespace( + []string{"localhost:2379"}, // endpoints + ns, + ) - if err := etcdClient.Connect(); err != nil { + if err := etcdClient.Init(); err != nil { t.Errorf("client connection error: %+v", err) return } - defer etcdClient.Destroy() + defer etcdClient.Close() etcdFs := &etcdfs.Fs{ - Client: etcdClient.GetClient(), + Client: etcdClient, Metadata: superblock, DataPrefix: etcdfs.DefaultDataPrefix, + + Logf: logf, } //var etcdFs afero.Fs = NewEtcdFs() @@ -193,20 +200,26 @@ func TestFs2(t *testing.T) { } defer stopEtcd() // ignore the error - etcdClient := &etcd.ClientEtcd{ - Seeds: []string{"localhost:2379"}, // endpoints + logf := func(format string, v ...interface{}) { + t.Logf("test: etcd: fs: "+format, v...) } + etcdClient := client.NewClientFromSeedsNamespace( + []string{"localhost:2379"}, // endpoints + ns, + ) - if err := etcdClient.Connect(); err != nil { + if err := etcdClient.Init(); err != nil { t.Errorf("client connection error: %+v", err) return } - defer etcdClient.Destroy() + defer etcdClient.Close() etcdFs := &etcdfs.Fs{ - Client: etcdClient.GetClient(), + Client: etcdClient, Metadata: superblock, DataPrefix: etcdfs.DefaultDataPrefix, + + Logf: logf, } tree, err := util.FsTree(etcdFs, "/") @@ -246,20 +259,26 @@ func TestFs3(t *testing.T) { } defer stopEtcd() // ignore the error - etcdClient := &etcd.ClientEtcd{ - Seeds: []string{"localhost:2379"}, // endpoints + logf := func(format string, v ...interface{}) { + t.Logf("test: etcd: fs: "+format, v...) } + etcdClient := client.NewClientFromSeedsNamespace( + []string{"localhost:2379"}, // endpoints + ns, + ) - if err := etcdClient.Connect(); err != nil { + if err := etcdClient.Init(); err != nil { t.Errorf("client connection error: %+v", err) return } - defer etcdClient.Destroy() + defer etcdClient.Close() etcdFs := &etcdfs.Fs{ - Client: etcdClient.GetClient(), + Client: etcdClient, Metadata: superblock, DataPrefix: etcdfs.DefaultDataPrefix, + + Logf: logf, } if err := etcdFs.Mkdir("/tmp", umask); err != nil { @@ -371,18 +390,19 @@ func TestEtcdCopyFs0(t *testing.T) { } defer stopEtcd() // ignore the error - etcdClient := &etcd.ClientEtcd{ - Seeds: []string{"localhost:2379"}, // endpoints - } + etcdClient := client.NewClientFromSeedsNamespace( + []string{"localhost:2379"}, // endpoints + ns, + ) - if err := etcdClient.Connect(); err != nil { + if err := etcdClient.Init(); err != nil { t.Errorf("client connection error: %+v", err) return } - defer etcdClient.Destroy() + defer etcdClient.Close() etcdFs := &etcdfs.Fs{ - Client: etcdClient.GetClient(), + Client: etcdClient, Metadata: superblock, DataPrefix: etcdfs.DefaultDataPrefix, } diff --git a/etcd/helpers.go b/etcd/helpers.go new file mode 100644 index 00000000..38ee43fc --- /dev/null +++ b/etcd/helpers.go @@ -0,0 +1,160 @@ +// Mgmt +// Copyright (C) 2013-2019+ James Shubin and the project contributors +// Written by James Shubin and the project contributors +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package etcd + +import ( + "context" + "fmt" + "sort" + "strings" + + "github.com/purpleidea/mgmt/etcd/interfaces" + "github.com/purpleidea/mgmt/util" + "github.com/purpleidea/mgmt/util/errwrap" + + etcd "github.com/coreos/etcd/clientv3" + pb "github.com/coreos/etcd/etcdserver/etcdserverpb" + "github.com/coreos/etcd/mvcc/mvccpb" + etcdtypes "github.com/coreos/etcd/pkg/types" // generated package +) + +// setEndpoints sets the endpoints on the etcd client if it exists. It +// prioritizes local endpoints for performance, and so that if a remote endpoint +// disconnects we aren't affected. +func (obj *EmbdEtcd) setEndpoints() { + if obj.etcd == nil { // if client doesn't exist, skip! + return + } + + eps := fromURLsMapToStringList(obj.endpoints) // get flat list + sort.Strings(eps) // sort for determinism + + curls, _ := obj.curls() // ignore error, was already validated + + // prio sort so we connect locally first + urls := fromURLsToStringList(curls) + headFn := func(x string) bool { + return !util.StrInList(x, urls) + } + eps = util.PriorityStrSliceSort(eps, headFn) + if obj.Debug { + obj.Logf("set endpoints to: %+v", eps) + } + // trigger reconnect with new endpoint list + // XXX: When a client switches endpoints, do the watches continue from + // where they last were or do they restart? Add rev restart if needed. + obj.etcd.SetEndpoints(eps...) // no error to check +} + +// ConnectBlock runs a command as soon as the client is connected. When this +// happens, it closes the output channel. In case any error occurs, it sends it +// on that channel. +func (obj *EmbdEtcd) ConnectBlock(ctx context.Context, fn func(context.Context) error) <-chan error { + ch := make(chan error) + obj.wg.Add(1) + go func() { + defer obj.wg.Done() + defer close(ch) + select { + case <-obj.connectSignal: // the client is connected! + case <-ctx.Done(): + return + } + if fn == nil { + return + } + if err := fn(ctx); err != nil { + select { + case ch <- err: + case <-ctx.Done(): + } + } + }() + return ch +} + +// bootstrapWatcherData returns some a minimal WatcherData struct to simulate an +// initial event for bootstrapping the nominateCb before we've started up. +func bootstrapWatcherData(hostname string, urls etcdtypes.URLs) *interfaces.WatcherData { + return &interfaces.WatcherData{ + Created: true, // add this flag to hint that we're bootstrapping + + Header: pb.ResponseHeader{}, // not needed + Events: []*etcd.Event{ + { + Type: mvccpb.PUT, // or mvccpb.DELETE + Kv: &mvccpb.KeyValue{ + Key: []byte(hostname), + Value: []byte(urls.String()), + }, + }, + }, + } +} + +// applyDeltaEvents applies the WatchResponse deltas to a URLsMap and returns a +// modified copy. +func applyDeltaEvents(data *interfaces.WatcherData, urlsMap etcdtypes.URLsMap) (etcdtypes.URLsMap, error) { + if err := data.Err; err != nil { + return nil, errwrap.Wrapf(err, "data contains an error") + } + out, err := copyURLsMap(urlsMap) + if err != nil { + return nil, err + } + if data == nil { // passthrough + return out, nil + } + var reterr error + for _, event := range data.Events { + key := string(event.Kv.Key) + key = key[len(data.Path):] // remove path prefix + //obj.Logf("applyDeltaEvents: Event(%s): %s", event.Type.String(), key) + + switch event.Type { + case etcd.EventTypePut: + val := string(event.Kv.Value) + if val == "" { + return nil, fmt.Errorf("value is empty") + } + urls, err := etcdtypes.NewURLs(strings.Split(val, ",")) + if err != nil { + return nil, errwrap.Wrapf(err, "format error") + } + urlsMap[key] = urls // add to map + + // expiry cases are seen as delete in v3 for now + //case etcd.EventTypeExpire: // doesn't exist right now + // fallthrough + case etcd.EventTypeDelete: + if _, exists := urlsMap[key]; exists { + delete(urlsMap, key) + continue + } + + // this can happen if we retry an operation between a + // reconnect, so ignore in case we are reconnecting... + reterr = errInconsistentApply // key not found + // keep applying in case this is ignored + + default: + return nil, fmt.Errorf("unknown event: %v", event.Type) + } + } + return urlsMap, reterr +} diff --git a/etcd/interfaces/client.go b/etcd/interfaces/client.go new file mode 100644 index 00000000..8d15e1e8 --- /dev/null +++ b/etcd/interfaces/client.go @@ -0,0 +1,63 @@ +// Mgmt +// Copyright (C) 2013-2019+ James Shubin and the project contributors +// Written by James Shubin and the project contributors +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package interfaces + +import ( + "context" + + etcd "github.com/coreos/etcd/clientv3" // "clientv3" + pb "github.com/coreos/etcd/etcdserver/etcdserverpb" +) + +// WatcherData is the structure of data passed to a callback from any watcher. +type WatcherData struct { + // Created is true if this event is the initial event sent on startup. + Created bool + + // XXX: what goes here... this? or a more processed version? + Path string // the path we're watching + Header pb.ResponseHeader + Events []*etcd.Event + Err error +} + +// WatcherInfo is what is returned from a Watcher. It contains everything you +// might need to get information about the running watch. +type WatcherInfo struct { + // Cancel must be called to shutdown the Watcher when we are done with + // it. You can alternatively call cancel on the input ctx. + Cancel func() + + // Events returns a channel of any events that occur. This happens on + // watch startup, watch event, and watch failure. This channel closes + // when the Watcher shuts down. If you block on these reads, then you + // will block the entire Watcher which is usually not what you want. + Events <-chan *WatcherData +} + +// Client provides a simple interface specification for client requests. Both +// EmbdEtcd.MakeClient and client.Simple implement this. +type Client interface { + GetClient() *etcd.Client + Set(ctx context.Context, key, value string, opts ...etcd.OpOption) error + Get(ctx context.Context, path string, opts ...etcd.OpOption) (map[string]string, error) + Del(ctx context.Context, path string, opts ...etcd.OpOption) (int64, error) + Txn(ctx context.Context, ifCmps []etcd.Cmp, thenOps, elseOps []etcd.Op) (*etcd.TxnResponse, error) + Watcher(ctx context.Context, path string, opts ...etcd.OpOption) (chan error, error) + ComplexWatcher(ctx context.Context, path string, opts ...etcd.OpOption) (*WatcherInfo, error) +} diff --git a/etcd/interfaces/error.go b/etcd/interfaces/error.go new file mode 100644 index 00000000..71d2c997 --- /dev/null +++ b/etcd/interfaces/error.go @@ -0,0 +1,33 @@ +// Mgmt +// Copyright (C) 2013-2019+ James Shubin and the project contributors +// Written by James Shubin and the project contributors +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package interfaces + +// Error is a constant error type that implements error. +type Error string + +// Error fulfills the error interface of this type. +func (e Error) Error() string { return string(e) } + +const ( + // ErrNotExist is returned when GetStr or friends can not find the + // requested key. + ErrNotExist = Error("ErrNotExist") + + // ErrShutdown is returned when we're exiting during a shutdown. + ErrShutdown = Error("ErrShutdown") +) diff --git a/etcd/membership.go b/etcd/membership.go new file mode 100644 index 00000000..7a7bcfa1 --- /dev/null +++ b/etcd/membership.go @@ -0,0 +1,314 @@ +// Mgmt +// Copyright (C) 2013-2019+ James Shubin and the project contributors +// Written by James Shubin and the project contributors +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package etcd + +import ( + "context" + "fmt" + "net/url" + "sort" + "time" + + "github.com/purpleidea/mgmt/util/errwrap" + + etcd "github.com/coreos/etcd/clientv3" + rpctypes "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes" + pb "github.com/coreos/etcd/etcdserver/etcdserverpb" + etcdtypes "github.com/coreos/etcd/pkg/types" +) + +// addSelfState is used to populate the initial state when I am adding myself. +func (obj *EmbdEtcd) addSelfState() { + surls, _ := obj.surls() // validated on init + curls, _ := obj.curls() // validated on init + obj.membermap[obj.Hostname] = surls + obj.endpoints[obj.Hostname] = curls + obj.memberIDs[obj.Hostname] = obj.serverID +} + +// addMemberState adds the specific member state to our local caches. +func (obj *EmbdEtcd) addMemberState(member string, id uint64, surls, curls etcdtypes.URLs) { + obj.stateMutex.Lock() + defer obj.stateMutex.Unlock() + if surls != nil { + obj.membermap[member] = surls + } + if curls != nil { // TODO: && len(curls) > 0 ? + obj.endpoints[member] = curls + } + obj.memberIDs[member] = id +} + +// rmMemberState removes the state of a given member. +func (obj *EmbdEtcd) rmMemberState(member string) { + obj.stateMutex.Lock() + defer obj.stateMutex.Unlock() + delete(obj.membermap, member) // proactively delete it + delete(obj.endpoints, member) // proactively delete it + delete(obj.memberIDs, member) // proactively delete it +} + +// updateMemberState updates some of our local state whenever we get new +// information from a response. +// TODO: ideally this would be []*etcd.Member but the types are inconsistent... +// TODO: is it worth computing a delta to see if we need to change this? +func (obj *EmbdEtcd) updateMemberState(members []*pb.Member) error { + //nominated := make(etcdtypes.URLsMap) + //volunteers := make(etcdtypes.URLsMap) + membermap := make(etcdtypes.URLsMap) // map[hostname]URLs + endpoints := make(etcdtypes.URLsMap) // map[hostname]URLs + memberIDs := make(map[string]uint64) // map[hostname]memberID + + // URLs is etcdtypes.URLs is []url.URL + for _, member := range members { + // member.ID // uint64 + // member.Name // string (hostname) + // member.PeerURLs // []string (URLs) + // member.ClientURLs // []string (URLs) + + if member.Name == "" { // not started yet + continue + } + + // []string -> etcdtypes.URLs + purls, err := etcdtypes.NewURLs(member.PeerURLs) + if err != nil { + return err + } + curls, err := etcdtypes.NewURLs(member.ClientURLs) + if err != nil { + return err + } + + //nominated[member.Name] = member.PeerURLs + //volunteers[member.Name] = member.PeerURLs + membermap[member.Name] = purls + endpoints[member.Name] = curls + memberIDs[member.Name] = member.ID + } + + // set + obj.stateMutex.Lock() + defer obj.stateMutex.Unlock() + // can't set these two, because we only have a partial knowledge of them + //obj.nominated = nominated // can't get this information (partial) + //obj.volunteers = volunteers // can't get this information (partial) + obj.membermap = membermap + obj.endpoints = endpoints + obj.memberIDs = memberIDs + + return nil +} + +// memberList returns the current list of server peer members in the cluster. +func (obj *EmbdEtcd) memberList(ctx context.Context) (*etcd.MemberListResponse, error) { + return obj.etcd.MemberList(ctx) +} + +// memberAdd adds a member to the cluster. +func (obj *EmbdEtcd) memberAdd(ctx context.Context, peerURLs etcdtypes.URLs) (*etcd.MemberAddResponse, error) { + resp, err := obj.etcd.MemberAdd(ctx, peerURLs.StringSlice()) + if err == rpctypes.ErrPeerURLExist { // commonly seen at startup + return nil, nil + } + if err == rpctypes.ErrMemberExist { // not seen yet, but plan for it + return nil, nil + } + return resp, err +} + +// memberRemove removes a member by ID and returns if it worked, and also if +// there was an error. This is because it might have run without error, but the +// member wasn't found, for example. If a value of zero is used, then it will +// try to remove itself in an idempotent way based on whether we're supposed to +// be running a server or not. +func (obj *EmbdEtcd) memberRemove(ctx context.Context, memberID uint64) (*etcd.MemberRemoveResponse, error) { + if memberID == 0 { + // copy value to avoid it changing part way through + memberID = obj.serverID + } + if memberID == 0 { + return nil, fmt.Errorf("can't remove memberID of zero") + } + + resp, err := obj.etcd.MemberRemove(ctx, memberID) + if err == rpctypes.ErrMemberNotFound { + // if we get this, member already shut itself down :) + return nil, nil // unchanged, mask this error + } + + return resp, err // changed +} + +// memberChange polls the member list API and runs a function on each iteration. +// If that function returns nil, then it closes the output channel to signal an +// event. Between iterations, it sleeps for a given interval. Since this polls +// and doesn't watch events, it could miss changes if they happen rapidly. It +// does not send results on the channel, since results could be captured in the +// fn callback. It will send an error on the channel if something goes wrong. +// TODO: https://github.com/coreos/etcd/issues/5277 +func (obj *EmbdEtcd) memberChange(ctx context.Context, fn func([]*pb.Member) error, d time.Duration) (chan error, error) { + ch := make(chan error) + go func() { + defer close(ch) + for { + resp, err := obj.etcd.MemberList(ctx) + if err != nil { + select { + case ch <- err: // send error + case <-ctx.Done(): + } + return + } + result := fn(resp.Members) + if result == nil { // done! + return + } + select { + case <-time.After(d): // sleep before retry + // pass + case <-ctx.Done(): + return + } + } + }() + return ch, nil +} + +// memberStateFromList does a member list, and applies the state to our cache. +func (obj *EmbdEtcd) memberStateFromList(ctx context.Context) error { + resp, err := obj.memberList(ctx) + if err != nil { + return err + } + if resp == nil { + return fmt.Errorf("empty response") + } + reterr := obj.updateMemberState(resp.Members) + if reterr == nil { + obj.setEndpoints() // sync client with new endpoints + } + return reterr +} + +// isLeader returns true if I'm the leader from the first sane perspective (pov) +// that I can arbitrarily pick. +func (obj *EmbdEtcd) isLeader(ctx context.Context) (bool, error) { + if obj.server == nil { + return false, nil // if i'm not a server, i'm not a leader, return + } + + var ep, backup *url.URL + if len(obj.ClientURLs) > 0 { + // heuristic, but probably correct + addresses := localhostURLs(obj.ClientURLs) + if len(addresses) > 0 { + ep = &addresses[0] // arbitrarily pick the first one + } + backup = &obj.ClientURLs[0] // backup + } + if ep == nil && len(obj.AClientURLs) > 0 { + addresses := localhostURLs(obj.AClientURLs) + if len(addresses) > 0 { + ep = &addresses[0] + } + backup = &obj.AClientURLs[0] // backup + } + if ep == nil { + ep = backup + } + if ep == nil { // programming error? + return false, fmt.Errorf("no available endpoints") + } + + // Ask for one perspective... + // TODO: are we supposed to use ep.Host instead? + resp, err := obj.etcd.Maintenance.Status(ctx, ep.String()) // this perspective + if err != nil { + return false, err + } + if resp == nil { + return false, fmt.Errorf("empty response") + } + if resp.Leader != obj.serverID { // i am not the leader + return false, nil + } + + return true, nil +} + +// moveLeaderSomewhere tries to transfer the leader to the alphanumerically +// lowest member if the caller is the current leader. This contains races. If it +// succeeds, it returns the member hostname that it transferred to. If it can't +// transfer, but doesn't error, it returns an empty string. Any error condition +// returns an error. +func (obj *EmbdEtcd) moveLeaderSomewhere(ctx context.Context) (string, error) { + //if isLeader, err := obj.isLeader(ctx); err != nil { // race! + // return "", errwrap.Wrapf(err, "error determining leader") + //} else if !isLeader { + // if obj.Debug { + // obj.Logf("we are not the leader...") + // } + // return "", nil + //} + // assume i am the leader! + + memberList, err := obj.memberList(ctx) + if err != nil { + return "", err + } + + var transfereeID uint64 + m := make(map[string]uint64) + names := []string{} + for _, x := range memberList.Members { + m[x.Name] = x.ID + if x.Name != obj.Hostname { + names = append(names, x.Name) + } + } + if len(names) == 0 { + return "", nil // can't transfer to self, last remaining host + } + if len(names) == 1 && names[0] == obj.Hostname { // does this happen? + return "", nil // can't transfer to self + } + sort.Strings(names) + if len(names) > 0 { + // transfer to alphanumerically lowest ID for consistency... + transfereeID = m[names[0]] + } + + if transfereeID == 0 { // safety + return "", fmt.Errorf("got memberID of zero") + } + if transfereeID == obj.serverID { + return "", nil // can't transfer to self + } + + // do the move + if _, err := obj.etcd.MoveLeader(ctx, transfereeID); err == rpctypes.ErrNotLeader { + if obj.Debug { + obj.Logf("we are not the leader...") + } + return "", nil // we are not the leader + } else if err != nil { + return "", errwrap.Wrapf(err, "error moving leader") + } + return names[0], nil +} diff --git a/etcd/methods.go b/etcd/methods.go index 3563f4f8..94ecf65d 100644 --- a/etcd/methods.go +++ b/etcd/methods.go @@ -18,394 +18,220 @@ package etcd import ( + "context" "fmt" - "log" - "strconv" "strings" + "github.com/purpleidea/mgmt/util/errwrap" + etcd "github.com/coreos/etcd/clientv3" - rpctypes "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes" + etcdutil "github.com/coreos/etcd/clientv3/clientv3util" etcdtypes "github.com/coreos/etcd/pkg/types" - context "golang.org/x/net/context" ) -// TODO: Could all these Etcd*(obj *EmbdEtcd, ...) functions which deal with the -// interface between etcd paths and behaviour be grouped into a single struct ? - -// Nominate nominates a particular client to be a server (peer). -func Nominate(obj *EmbdEtcd, hostname string, urls etcdtypes.URLs) error { - if obj.flags.Trace { - log.Printf("Trace: Etcd: Nominate(%v): %v", hostname, urls.String()) - defer log.Printf("Trace: Etcd: Nominate(%v): Finished!", hostname) - } - // nominate someone to be a server - nominate := fmt.Sprintf("%s/nominated/%s", NS, hostname) - ops := []etcd.Op{} // list of ops in this txn - if urls != nil { - ops = append(ops, etcd.OpPut(nominate, urls.String())) // TODO: add a TTL? (etcd.WithLease) - - } else { // delete message if set to erase - ops = append(ops, etcd.OpDelete(nominate)) - } - - if _, err := obj.Txn(nil, ops, nil); err != nil { - return fmt.Errorf("nominate failed") // exit in progress? - } - return nil -} - -// Nominated returns a urls map of nominated etcd server volunteers. -// NOTE: I know 'nominees' might be more correct, but is less consistent here -func Nominated(obj *EmbdEtcd) (etcdtypes.URLsMap, error) { - path := fmt.Sprintf("%s/nominated/", NS) - keyMap, err := obj.Get(path, etcd.WithPrefix()) // map[string]string, bool - if err != nil { - return nil, fmt.Errorf("nominated isn't available: %v", err) - } - nominated := make(etcdtypes.URLsMap) - for key, val := range keyMap { // loop through directory of nominated - if !strings.HasPrefix(key, path) { - continue +// volunteer offers yourself up to be a server if needed. If you specify a nil +// value for urls, then this will unvolunteer yourself. +func (obj *EmbdEtcd) volunteer(ctx context.Context, urls etcdtypes.URLs) error { + if obj.Debug { + if urls == nil { + obj.Logf("unvolunteer...") + defer obj.Logf("unvolunteer: done!") + } else { + obj.Logf("volunteer: %s", urls.String()) + defer obj.Logf("volunteer: done!") } - name := key[len(path):] // get name of nominee - if val == "" { // skip "erased" values - continue - } - urls, err := etcdtypes.NewURLs(strings.Split(val, ",")) - if err != nil { - return nil, fmt.Errorf("nominated data format error: %v", err) - } - nominated[name] = urls // add to map - if obj.flags.Debug { - log.Printf("Etcd: Nominated(%v): %v", name, val) - } - } - return nominated, nil -} - -// Volunteer offers yourself up to be a server if needed. -func Volunteer(obj *EmbdEtcd, urls etcdtypes.URLs) error { - if obj.flags.Trace { - log.Printf("Trace: Etcd: Volunteer(%v): %v", obj.hostname, urls.String()) - defer log.Printf("Trace: Etcd: Volunteer(%v): Finished!", obj.hostname) } // volunteer to be a server - volunteer := fmt.Sprintf("%s/volunteers/%s", NS, obj.hostname) - ops := []etcd.Op{} // list of ops in this txn + key := fmt.Sprintf(obj.NS+volunteerPathFmt, obj.Hostname) + ifs := []etcd.Cmp{} // list matching the desired state + ops := []etcd.Op{} // list of ops in this txn + els := []etcd.Op{} if urls != nil { - // XXX: adding a TTL is crucial! (i think) - ops = append(ops, etcd.OpPut(volunteer, urls.String())) // value is usually a peer "serverURL" + data := urls.String() // value is usually a peer "serverURL" + // XXX: bug: https://github.com/etcd-io/etcd/issues/10566 + // XXX: reverse things with els to workaround the bug :( + //ifs = append(ifs, etcd.Compare(etcd.Value(key), "!=", data)) // desired state + //ops = append(ops, etcd.OpPut(key, data, etcd.WithLease(obj.leaseID))) + ifs = append(ifs, etcd.Compare(etcd.Value(key), "=", data)) // desired state + ifs = append(ifs, etcd.Compare(etcd.LeaseValue(key), "=", obj.leaseID)) + els = append(ops, etcd.OpPut(key, data, etcd.WithLease(obj.leaseID))) } else { // delete message if set to erase - ops = append(ops, etcd.OpDelete(volunteer)) + ifs = append(ifs, etcdutil.KeyExists(key)) // desired state + ops = append(ops, etcd.OpDelete(key)) } - if _, err := obj.Txn(nil, ops, nil); err != nil { - return fmt.Errorf("volunteering failed") // exit in progress? + _, err := obj.client.Txn(ctx, ifs, ops, els) + msg := "volunteering failed" + if urls == nil { + msg = "unvolunteering failed" } - return nil + return errwrap.Wrapf(err, msg) } -// Volunteers returns a urls map of available etcd server volunteers. -func Volunteers(obj *EmbdEtcd) (etcdtypes.URLsMap, error) { - if obj.flags.Trace { - log.Printf("Trace: Etcd: Volunteers()") - defer log.Printf("Trace: Etcd: Volunteers(): Finished!") +// nominate nominates a particular client to be a server (peer). If you specify +// a nil value for urls, then this will unnominate that member. +func (obj *EmbdEtcd) nominate(ctx context.Context, hostname string, urls etcdtypes.URLs) error { + if obj.Debug { + if urls == nil { + obj.Logf("unnominate(%s)...", hostname) + defer obj.Logf("unnominate(%s): done!", hostname) + } else { + obj.Logf("nominate(%s): %s", hostname, urls.String()) + defer obj.Logf("nominate(%s): done!", hostname) + } } - path := fmt.Sprintf("%s/volunteers/", NS) - keyMap, err := obj.Get(path, etcd.WithPrefix()) + // nominate someone to be a server + key := fmt.Sprintf(obj.NS+nominatedPathFmt, hostname) + ifs := []etcd.Cmp{} // list matching the desired state + ops := []etcd.Op{} // list of ops in this txn + els := []etcd.Op{} + if urls != nil { + data := urls.String() + // XXX: bug: https://github.com/etcd-io/etcd/issues/10566 + // XXX: reverse things with els to workaround the bug :( + //ifs = append(ifs, etcd.Compare(etcd.Value(key), "!=", data)) // desired state + //ops = append(ops, etcd.OpPut(key, data)) // TODO: add a TTL? (etcd.WithLease) + ifs = append(ifs, etcd.Compare(etcd.Value(key), "=", data)) // desired state + els = append(ops, etcd.OpPut(key, data)) // TODO: add a TTL? (etcd.WithLease) + + } else { // delete message if set to erase + ifs = append(ifs, etcdutil.KeyExists(key)) // desired state + ops = append(ops, etcd.OpDelete(key)) + } + + _, err := obj.client.Txn(ctx, ifs, ops, els) + msg := "nominate failed" + if urls == nil { + msg = "unnominate failed" + } + return errwrap.Wrapf(err, msg) +} + +// advertise idempotently advertises the list of available client endpoints for +// the given member. If you specify a nil value for urls, then this will remove +// that member. +func (obj *EmbdEtcd) advertise(ctx context.Context, hostname string, urls etcdtypes.URLs) error { + if obj.Debug { + if urls == nil { + obj.Logf("unadvertise(%s)...", hostname) + defer obj.Logf("unadvertise(%s): done!", hostname) + } else { + obj.Logf("advertise(%s): %s", hostname, urls.String()) + defer obj.Logf("advertise(%s): done!", hostname) + } + } + // advertise endpoints + key := fmt.Sprintf(obj.NS+endpointsPathFmt, hostname) + ifs := []etcd.Cmp{} // list matching the desired state + ops := []etcd.Op{} // list of ops in this txn + els := []etcd.Op{} + if urls != nil { + data := urls.String() // value is usually a "clientURL" + // XXX: bug: https://github.com/etcd-io/etcd/issues/10566 + // XXX: reverse things with els to workaround the bug :( + //ifs = append(ifs, etcd.Compare(etcd.Value(key), "!=", data)) // desired state + //ops = append(ops, etcd.OpPut(key, data, etcd.WithLease(obj.leaseID))) + ifs = append(ifs, etcd.Compare(etcd.Value(key), "=", data)) // desired state + ifs = append(ifs, etcd.Compare(etcd.LeaseValue(key), "=", obj.leaseID)) + els = append(ops, etcd.OpPut(key, data, etcd.WithLease(obj.leaseID))) + } else { // delete in this case + ifs = append(ifs, etcdutil.KeyExists(key)) // desired state + ops = append(ops, etcd.OpDelete(key)) + } + + _, err := obj.client.Txn(ctx, ifs, ops, els) + msg := "advertising failed" + if urls == nil { + msg = "unadvertising failed" + } + return errwrap.Wrapf(err, msg) +} + +// getVolunteers returns a urls map of available etcd server volunteers. +func (obj *EmbdEtcd) getVolunteers(ctx context.Context) (etcdtypes.URLsMap, error) { + if obj.Debug { + obj.Logf("getVolunteers()") + defer obj.Logf("getVolunteers(): done!") + } + p := obj.NS + VolunteerPath + keyMap, err := obj.client.Get(ctx, p, etcd.WithPrefix()) if err != nil { - return nil, fmt.Errorf("volunteers aren't available: %v", err) + return nil, errwrap.Wrapf(err, "can't get peer volunteers") } volunteers := make(etcdtypes.URLsMap) for key, val := range keyMap { // loop through directory of volunteers - if !strings.HasPrefix(key, path) { + if !strings.HasPrefix(key, p) { continue } - name := key[len(path):] // get name of volunteer - if val == "" { // skip "erased" values + name := key[len(p):] // get name of volunteer + if val == "" { // skip "erased" values continue } urls, err := etcdtypes.NewURLs(strings.Split(val, ",")) if err != nil { - return nil, fmt.Errorf("volunteers data format error: %v", err) + return nil, errwrap.Wrapf(err, "data format error") } volunteers[name] = urls // add to map - if obj.flags.Debug { - log.Printf("Etcd: Volunteer(%v): %v", name, val) - } } return volunteers, nil } -// AdvertiseEndpoints advertises the list of available client endpoints. -func AdvertiseEndpoints(obj *EmbdEtcd, urls etcdtypes.URLs) error { - if obj.flags.Trace { - log.Printf("Trace: Etcd: AdvertiseEndpoints(%v): %v", obj.hostname, urls.String()) - defer log.Printf("Trace: Etcd: AdvertiseEndpoints(%v): Finished!", obj.hostname) +// getNominated returns a urls map of nominated etcd server volunteers. +// NOTE: I know 'nominees' might be more correct, but is less consistent here +func (obj *EmbdEtcd) getNominated(ctx context.Context) (etcdtypes.URLsMap, error) { + if obj.Debug { + obj.Logf("getNominated()") + defer obj.Logf("getNominated(): done!") } - // advertise endpoints - endpoints := fmt.Sprintf("%s/endpoints/%s", NS, obj.hostname) - ops := []etcd.Op{} // list of ops in this txn - if urls != nil { - // TODO: add a TTL? (etcd.WithLease) - ops = append(ops, etcd.OpPut(endpoints, urls.String())) // value is usually a "clientURL" - - } else { // delete message if set to erase - ops = append(ops, etcd.OpDelete(endpoints)) - } - - if _, err := obj.Txn(nil, ops, nil); err != nil { - return fmt.Errorf("endpoint advertising failed") // exit in progress? - } - return nil -} - -// Endpoints returns a urls map of available etcd server endpoints. -func Endpoints(obj *EmbdEtcd) (etcdtypes.URLsMap, error) { - if obj.flags.Trace { - log.Printf("Trace: Etcd: Endpoints()") - defer log.Printf("Trace: Etcd: Endpoints(): Finished!") - } - path := fmt.Sprintf("%s/endpoints/", NS) - keyMap, err := obj.Get(path, etcd.WithPrefix()) + p := obj.NS + NominatedPath + keyMap, err := obj.client.Get(ctx, p, etcd.WithPrefix()) // map[string]string, bool if err != nil { - return nil, fmt.Errorf("endpoints aren't available: %v", err) + return nil, errwrap.Wrapf(err, "can't get nominated peers") } - endpoints := make(etcdtypes.URLsMap) - for key, val := range keyMap { // loop through directory of endpoints - if !strings.HasPrefix(key, path) { + nominated := make(etcdtypes.URLsMap) + for key, val := range keyMap { // loop through directory of nominated + if !strings.HasPrefix(key, p) { continue } - name := key[len(path):] // get name of volunteer - if val == "" { // skip "erased" values + name := key[len(p):] // get name of nominee + if val == "" { // skip "erased" values continue } urls, err := etcdtypes.NewURLs(strings.Split(val, ",")) if err != nil { - return nil, fmt.Errorf("endpoints data format error: %v", err) + return nil, errwrap.Wrapf(err, "data format error") + } + nominated[name] = urls // add to map + } + return nominated, nil +} + +// getEndpoints returns a urls map of available endpoints for clients. +func (obj *EmbdEtcd) getEndpoints(ctx context.Context) (etcdtypes.URLsMap, error) { + if obj.Debug { + obj.Logf("getEndpoints()") + defer obj.Logf("getEndpoints(): done!") + } + p := obj.NS + EndpointsPath + keyMap, err := obj.client.Get(ctx, p, etcd.WithPrefix()) + if err != nil { + return nil, errwrap.Wrapf(err, "can't get client endpoints") + } + endpoints := make(etcdtypes.URLsMap) + for key, val := range keyMap { // loop through directory of endpoints + if !strings.HasPrefix(key, p) { + continue + } + name := key[len(p):] // get name of volunteer + if val == "" { // skip "erased" values + continue + } + urls, err := etcdtypes.NewURLs(strings.Split(val, ",")) + if err != nil { + return nil, errwrap.Wrapf(err, "data format error") } endpoints[name] = urls // add to map - if obj.flags.Debug { - log.Printf("Etcd: Endpoint(%v): %v", name, val) - } } return endpoints, nil } - -// SetHostnameConverged sets whether a specific hostname is converged. -func SetHostnameConverged(obj *EmbdEtcd, hostname string, isConverged bool) error { - if obj.flags.Trace { - log.Printf("Trace: Etcd: SetHostnameConverged(%s): %v", hostname, isConverged) - defer log.Printf("Trace: Etcd: SetHostnameConverged(%v): Finished!", hostname) - } - converged := fmt.Sprintf("%s/converged/%s", NS, hostname) - op := []etcd.Op{etcd.OpPut(converged, fmt.Sprintf("%t", isConverged))} - if _, err := obj.Txn(nil, op, nil); err != nil { // TODO: do we need a skipConv flag here too? - return fmt.Errorf("set converged failed") // exit in progress? - } - return nil -} - -// HostnameConverged returns a map of every hostname's converged state. -func HostnameConverged(obj *EmbdEtcd) (map[string]bool, error) { - if obj.flags.Trace { - log.Printf("Trace: Etcd: HostnameConverged()") - defer log.Printf("Trace: Etcd: HostnameConverged(): Finished!") - } - path := fmt.Sprintf("%s/converged/", NS) - keyMap, err := obj.ComplexGet(path, true, etcd.WithPrefix()) // don't un-converge - if err != nil { - return nil, fmt.Errorf("converged values aren't available: %v", err) - } - converged := make(map[string]bool) - for key, val := range keyMap { // loop through directory... - if !strings.HasPrefix(key, path) { - continue - } - name := key[len(path):] // get name of key - if val == "" { // skip "erased" values - continue - } - b, err := strconv.ParseBool(val) - if err != nil { - return nil, fmt.Errorf("converged data format error: %v", err) - } - converged[name] = b // add to map - } - return converged, nil -} - -// AddHostnameConvergedWatcher adds a watcher with a callback that runs on -// hostname state changes. -func AddHostnameConvergedWatcher(obj *EmbdEtcd, callbackFn func(map[string]bool) error) (func(), error) { - path := fmt.Sprintf("%s/converged/", NS) - internalCbFn := func(re *RE) error { - // TODO: get the value from the response, and apply delta... - // for now, just run a get operation which is easier to code! - m, err := HostnameConverged(obj) - if err != nil { - return err - } - return callbackFn(m) // call my function - } - return obj.AddWatcher(path, internalCbFn, true, true, etcd.WithPrefix()) // no block and no converger reset -} - -// SetClusterSize sets the ideal target cluster size of etcd peers. -func SetClusterSize(obj *EmbdEtcd, value uint16) error { - if obj.flags.Trace { - log.Printf("Trace: Etcd: SetClusterSize(): %v", value) - defer log.Printf("Trace: Etcd: SetClusterSize(): Finished!") - } - key := fmt.Sprintf("%s/idealClusterSize", NS) - - if err := obj.Set(key, strconv.FormatUint(uint64(value), 10)); err != nil { - return fmt.Errorf("function SetClusterSize failed: %v", err) // exit in progress? - } - return nil -} - -// GetClusterSize gets the ideal target cluster size of etcd peers. -func GetClusterSize(obj *EmbdEtcd) (uint16, error) { - key := fmt.Sprintf("%s/idealClusterSize", NS) - keyMap, err := obj.Get(key) - if err != nil { - return 0, fmt.Errorf("function GetClusterSize failed: %v", err) - } - - val, exists := keyMap[key] - if !exists || val == "" { - return 0, fmt.Errorf("function GetClusterSize failed: %v", err) - } - - v, err := strconv.ParseUint(val, 10, 16) - if err != nil { - return 0, fmt.Errorf("function GetClusterSize failed: %v", err) - } - return uint16(v), nil -} - -// MemberAdd adds a member to the cluster. -func MemberAdd(obj *EmbdEtcd, peerURLs etcdtypes.URLs) (*etcd.MemberAddResponse, error) { - //obj.Connect(false) // TODO: ? - ctx := context.Background() - var response *etcd.MemberAddResponse - var err error - for { - if obj.exiting { // the exit signal has been sent! - return nil, fmt.Errorf("exiting etcd") - } - obj.rLock.RLock() - response, err = obj.client.MemberAdd(ctx, peerURLs.StringSlice()) - obj.rLock.RUnlock() - if err == nil { - break - } - if ctx, err = obj.CtxError(ctx, err); err != nil { - return nil, err - } - } - return response, nil -} - -// MemberRemove removes a member by mID and returns if it worked, and also -// if there was an error. This is because it might have run without error, but -// the member wasn't found, for example. -func MemberRemove(obj *EmbdEtcd, mID uint64) (bool, error) { - //obj.Connect(false) // TODO: ? - ctx := context.Background() - for { - if obj.exiting { // the exit signal has been sent! - return false, fmt.Errorf("exiting etcd") - } - obj.rLock.RLock() - _, err := obj.client.MemberRemove(ctx, mID) - obj.rLock.RUnlock() - if err == nil { - break - } else if err == rpctypes.ErrMemberNotFound { - // if we get this, member already shut itself down :) - return false, nil - } - if ctx, err = obj.CtxError(ctx, err); err != nil { - return false, err - } - } - return true, nil -} - -// Members returns information on cluster membership. -// The member ID's are the keys, because an empty names means unstarted! -// TODO: consider queueing this through the main loop with CtxError(ctx, err) -func Members(obj *EmbdEtcd) (map[uint64]string, error) { - //obj.Connect(false) // TODO: ? - ctx := context.Background() - var response *etcd.MemberListResponse - var err error - for { - if obj.exiting { // the exit signal has been sent! - return nil, fmt.Errorf("exiting etcd") - } - obj.rLock.RLock() - if obj.flags.Trace { - log.Printf("Trace: Etcd: Members(): Endpoints are: %v", obj.client.Endpoints()) - } - response, err = obj.client.MemberList(ctx) - obj.rLock.RUnlock() - if err == nil { - break - } - if ctx, err = obj.CtxError(ctx, err); err != nil { - return nil, err - } - } - - members := make(map[uint64]string) - for _, x := range response.Members { - members[x.ID] = x.Name // x.Name will be "" if unstarted! - } - return members, nil -} - -// Leader returns the current leader of the etcd server cluster. -func Leader(obj *EmbdEtcd) (string, error) { - //obj.Connect(false) // TODO: ? - membersMap, err := Members(obj) - if err != nil { - return "", err - } - addresses := obj.LocalhostClientURLs() // heuristic, but probably correct - if len(addresses) == 0 { - // probably a programming error... - return "", fmt.Errorf("programming error") - } - endpoint := addresses[0].Host // FIXME: arbitrarily picked the first one - - // part two - ctx := context.Background() - var response *etcd.StatusResponse - for { - if obj.exiting { // the exit signal has been sent! - return "", fmt.Errorf("exiting etcd") - } - - obj.rLock.RLock() - response, err = obj.client.Maintenance.Status(ctx, endpoint) - obj.rLock.RUnlock() - if err == nil { - break - } - if ctx, err = obj.CtxError(ctx, err); err != nil { - return "", err - } - } - - // isLeader: response.Header.MemberId == response.Leader - for id, name := range membersMap { - if id == response.Leader { - return name, nil - } - } - return "", fmt.Errorf("members map is not current") // not found -} diff --git a/etcd/server.go b/etcd/server.go new file mode 100644 index 00000000..11371b09 --- /dev/null +++ b/etcd/server.go @@ -0,0 +1,309 @@ +// Mgmt +// Copyright (C) 2013-2019+ James Shubin and the project contributors +// Written by James Shubin and the project contributors +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package etcd + +import ( + "fmt" + "os" + "path" + "strings" + "time" + + "github.com/purpleidea/mgmt/util" + "github.com/purpleidea/mgmt/util/errwrap" + + "github.com/coreos/etcd/embed" + etcdtypes "github.com/coreos/etcd/pkg/types" +) + +const ( + // MaxServerStartTimeout is the amount of time to wait for the server + // to start before considering it a failure. If you hit this timeout, + // let us know so that we can analyze the situation, and increase this + // if necessary. + MaxServerStartTimeout = 60 * time.Second + + // MaxServerCloseTimeout is the maximum amount of time we'll wait for + // the server to close down. If it exceeds this, it's probably a bug. + MaxServerCloseTimeout = 15 * time.Second + + // MaxServerRetries is the maximum number of times we can try to restart + // the server if it fails on startup. This can help workaround some + // timing bugs in etcd. + MaxServerRetries = 5 + + // ServerRetryWait is the amount of time to wait between retries. + ServerRetryWait = 500 * time.Millisecond +) + +// serverAction represents the desired server state. +type serverAction uint8 + +const ( + serverActionStop serverAction = iota + serverActionStart +) + +// serverAction returns whether we should do the action requested. The action is +// either start (true) or stop (false) as input. For example, if we run this as: +// true -> true, it means we asked if we should start, and the answer is yes. +func (obj *EmbdEtcd) serverAction(action serverAction) bool { + // check if i have actually volunteered first of all... + if obj.NoServer || len(obj.ServerURLs) == 0 { + obj.Logf("inappropriately nominated, rogue or stale server?") + return false // no action + } + + _, exists := obj.nominated[obj.Hostname] // am i nominated? + + // if there are no other peers, we create a new server + // TODO: do we need an || len(obj.nominated) == 0 if we're the first? + newCluster := len(obj.nominated) == 1 && exists + + switch action { + case serverActionStart: + // we start if... + return obj.server == nil && (exists || newCluster) + + case serverActionStop: + // we stop if... + return obj.server != nil && !exists + } + + return false // no action needed +} + +// runServer kicks of a new embedded etcd server. It exits when the server shuts +// down. The exit can be triggered at any time by running destroyServer or if it +// exits due to some condition like an error. +// FIXME: should peerURLsMap just use obj.nominated instead? +func (obj *EmbdEtcd) runServer(newCluster bool, peerURLsMap etcdtypes.URLsMap) (reterr error) { + obj.Logf("server: runServer: (newCluster=%t): %+v", newCluster, peerURLsMap) + defer obj.Logf("server: runServer: done!") + //obj.serverwg.Wait() // bonus, but instead, a mutex would be race free! + obj.serverwg.Add(1) + defer obj.serverwg.Done() + defer obj.serverExitsSignal.Send() + dataDir := fmt.Sprintf("%s/", path.Join(obj.Prefix, "server")) + if err := os.MkdirAll(dataDir, 0770); err != nil { + return errwrap.Wrapf(err, "couldn't mkdir: %s", dataDir) + } + + memberName := obj.Hostname + + // if no peer URLs exist, then starting a server is mostly only for some + // testing, but etcd doesn't allow the value to be empty so we use this! + peerURLs, err := etcdtypes.NewURLs([]string{"http://localhost:0"}) + if err != nil { + return errwrap.Wrapf(err, "invalid URLs") + } + if len(obj.ServerURLs) > 0 { + peerURLs = obj.ServerURLs + } + initialPeerURLsMap, err := copyURLsMap(peerURLsMap) + if err != nil { + return errwrap.Wrapf(err, "error copying URLsMap") + } + // add self to list if it's not already in there... + if _, exists := peerURLsMap[memberName]; !exists { + initialPeerURLsMap[memberName] = peerURLs + } + + // TODO: do we need to copy? + aPUrls := peerURLs + if len(obj.AServerURLs) > 0 { + aPUrls = obj.AServerURLs + } + // NOTE: this logic is similar to obj.curls() + aCUrls := obj.ClientURLs + if len(obj.AClientURLs) > 0 { + aCUrls = obj.AClientURLs + } + + // embed etcd + cfg := embed.NewConfig() + cfg.Name = memberName // hostname + cfg.Dir = dataDir + cfg.LPUrls = peerURLs + cfg.LCUrls = obj.ClientURLs + cfg.APUrls = aPUrls + cfg.ACUrls = aCUrls + cfg.StrictReconfigCheck = false // XXX: workaround https://github.com/coreos/etcd/issues/6305 + cfg.MaxTxnOps = DefaultMaxTxnOps + + cfg.InitialCluster = initialPeerURLsMap.String() // including myself! + if newCluster { + cfg.ClusterState = embed.ClusterStateFlagNew + } else { + cfg.ClusterState = embed.ClusterStateFlagExisting + } + //cfg.ForceNewCluster = newCluster // TODO: ? + + if err := cfg.Validate(); err != nil { + return errwrap.Wrapf(err, "server config is invalid") + } + + obj.Logf("server: starting...") + // TODO: etcd panics with: `create wal error: no space left on device` + // see: https://github.com/etcd-io/etcd/issues/10588 + defer func() { + if r := recover(); r != nil { // magic panic catcher + obj.Logf("server: panic: %s", r) + reterr = fmt.Errorf("panic during start with: %s", r) // set named return err + } + }() + // XXX: workaround: https://github.com/etcd-io/etcd/issues/10626 + // This runs when we see the nominate operation. This could also error + // if this races to start up, and happens before the member add runs. + count := 0 + for { + obj.server, err = embed.StartEtcd(cfg) + if err == nil { + break + } + e := err.Error() + // catch: error validating peerURLs ... member count is unequal + if strings.HasPrefix(e, "error validating peerURLs") && strings.HasSuffix(e, "member count is unequal") { + count++ + if count > MaxServerRetries { + err = errwrap.Wrapf(err, "workaround retries (%d) exceeded", MaxServerRetries) + break + } + obj.Logf("waiting %s for retry", ServerRetryWait.String()) + time.Sleep(ServerRetryWait) + continue + } + break + } + defer func() { + obj.server = nil // important because this is used as an isRunning flag + }() + if err != nil { + // early debug logs in case something downstream blocks + if obj.Debug { + obj.Logf("server failing with: %+v", err) + } + return errwrap.Wrapf(err, "server start failed") + } + + closedChan := make(chan struct{}) + defer func() { + select { + case <-time.After(MaxServerCloseTimeout): + obj.Logf("server: close timeout of %s reached", MaxServerCloseTimeout.String()) + case <-closedChan: + } + }() + defer func() { + // no wg here, since we want to let it die on exit if need be... + // XXX: workaround: https://github.com/etcd-io/etcd/issues/10600 + go func() { + obj.server.Close() // this blocks until server has stopped + close(closedChan) // woo! + }() + }() + defer obj.server.Server.Stop() // trigger a shutdown + + select { + case <-obj.server.Server.ReadyNotify(): // we hang here if things are bad + obj.Logf("server: ready") // it didn't hang! + + // TODO: should we wait for this notification elsewhere? + case <-obj.server.Server.StopNotify(): // it's going down now... + err := fmt.Errorf("received stop notification") + obj.Logf("server: stopped: %v", err) + return err + + case <-time.After(MaxServerStartTimeout): + err := fmt.Errorf("start timeout of %s reached", MaxServerStartTimeout.String()) + obj.Logf("server: %v", err) + return err + } + + obj.serverID = uint64(obj.server.Server.ID()) // store member id for internal use + defer func() { + obj.serverID = 0 // reset + }() + obj.addSelfState() // add to endpoints list so self client can connect! + obj.setEndpoints() // sync client with new endpoints + defer obj.setEndpoints() + defer obj.rmMemberState(obj.Hostname) + + obj.serverReadySignal.Send() // send a signal, and then reset the signal + + for { + select { + case err, ok := <-obj.server.Err(): + if !ok { // server shut down + return errwrap.Wrapf(err, "server shutdown error") + } + + case <-obj.serverExit.Signal(): + return errwrap.Wrapf(obj.serverExit.Error(), "server signal exit") + } + } + + //return nil // unreachable +} + +// destroyServer shuts down the embedded etcd server portion. +func (obj *EmbdEtcd) destroyServer() error { + // This function must be thread-safe because a destroy request will + // cause runServer to return, which then runs the defer of this function + // which is meant to clean up when an independent, normal runServer + // return happens. Add the mutex to protect against races on this call. + obj.servermu.Lock() + defer obj.servermu.Unlock() + if obj.server == nil { + return nil // don't error on redundant calls + } + obj.Logf("server: destroyServer...") + defer obj.Logf("server: destroyServer: done!") + + obj.serverExit.Done(nil) // trigger an exit + + obj.serverwg.Wait() // wait for server to finish shutting down + defer func() { + obj.serverExit = util.NewEasyExit() // reset + }() + return obj.serverExit.Error() +} + +// ServerReady returns a channel that closes when we're up and running. This +// process happens when calling runServer. If runServer is never called, this +// will never happen. It also returns a cancel/ack function which must be called +// once the signal is received or we are done watching it. This is because this +// is a cyclical signal which happens, and then gets reset as the server starts +// up, shuts down, and repeats the cycle. The cancel/ack function ensures that +// we only watch a signal when it's ready to be read, and only reset it when we +// are done watching it. +func (obj *EmbdEtcd) ServerReady() (<-chan struct{}, func()) { + return obj.serverReadySignal.Subscribe() +} + +// ServerExited returns a channel that closes when the server is destroyed. This +// process happens after runServer exits. If runServer is never called, this +// will never happen. It also returns a cancel/ack function which must be called +// once the signal is received or we are done watching it. This is because this +// is a cyclical signal which happens, and then gets reset as the server starts +// up, shuts down, and repeats the cycle. The cancel/ack function ensures that +// we only watch a signal when it's ready to be read, and only reset it when we +// are done watching it. +func (obj *EmbdEtcd) ServerExited() (<-chan struct{}, func()) { + return obj.serverExitsSignal.Subscribe() +} diff --git a/etcd/tasks.go b/etcd/tasks.go new file mode 100644 index 00000000..15399022 --- /dev/null +++ b/etcd/tasks.go @@ -0,0 +1,163 @@ +// Mgmt +// Copyright (C) 2013-2019+ James Shubin and the project contributors +// Written by James Shubin and the project contributors +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package etcd + +import ( + "fmt" + + "github.com/purpleidea/mgmt/util/errwrap" +) + +// task represents a single task to run. These are useful for pending work that +// we want to schedule, but that shouldn't permanently error the system on +// error. In particular idempotent tasks that are safe are ideal for this queue. +// The tasks can be added with queueTask. +type task struct { + name string // name of task + fn func() error // task to run + retry int // number of times to retry on error, -1 for infinite + block bool // should we block the queue until this succeeds? + report bool // should we report the error on permanent failure? +} + +// String prints a string representation of the struct. +func (obj *task) String() string { + return fmt.Sprintf("task(%s)", obj.name) +} + +// queueTask adds a task to the task worker queue. If you want to specify any +// properties that differ from the defaults, use queueRawTask instead. +func (obj *EmbdEtcd) queueTask(fn func() error) error { + obj.taskQueueLock.Lock() + obj.taskQueueLock.Unlock() + t := &task{ + fn: fn, + } + return obj.queueRawTask(t) +} + +// queueRawTask adds a task of any format to the queue. You should not name your +// task a string which could match a positive integer. Those names are used when +// an unnamed task is specified and the system needs to generate a name. +func (obj *EmbdEtcd) queueRawTask(t *task) error { + if obj.Debug { + obj.Logf("queueRawTask()") + defer obj.Logf("queueRawTask(): done!") + } + + if t == nil { + return fmt.Errorf("nil task") + } + + obj.taskQueueLock.Lock() + defer obj.taskQueueLock.Unlock() + if obj.taskQueue == nil { // killed signal + return fmt.Errorf("task queue killed") + } + if t.name == "" { + obj.taskQueueID++ // increment + t.name = fmt.Sprintf("%d", obj.taskQueueID) + } + + obj.taskQueue = append(obj.taskQueue, t) + if !obj.taskQueueRunning { + obj.taskQueueRunning = true + obj.taskQueueWg.Add(1) + go obj.runTaskQueue() + } + return nil +} + +// killTaskQueue empties the task queue, causing it to shutdown. +func (obj *EmbdEtcd) killTaskQueue() int { + obj.taskQueueLock.Lock() + count := len(obj.taskQueue) + obj.taskQueue = nil // clear queue + obj.taskQueueLock.Unlock() + + obj.taskQueueWg.Wait() // wait for queue to exit + obj.taskQueue = []*task{} // reset + return count // number of tasks deleted +} + +// runTaskQueue processes the task queue. This is started automatically by +// queueTask if needed. It will shut itself down when the queue is empty. +func (obj *EmbdEtcd) runTaskQueue() { + defer obj.taskQueueWg.Done() // added in queueTask + for { + obj.taskQueueLock.Lock() + if obj.taskQueue == nil || len(obj.taskQueue) == 0 { + defer obj.taskQueueLock.Unlock() + obj.taskQueueRunning = false + return + } + var t *task + t, obj.taskQueue = obj.taskQueue[0], obj.taskQueue[1:] + obj.taskQueueLock.Unlock() + + if !t.block { + if obj.Debug { + obj.Logf("%s: run...", t) + } + err := t.fn() + if obj.Debug { + obj.Logf("%s: done: %v", t, err) + } + if err != nil { + if t.retry == 0 { + if t.report { + // send a permanent error + // XXX: guard errChan for early close... hmmm + select { + case obj.errChan <- errwrap.Wrapf(err, "task error"): + } + } + continue + } + if t.retry > 0 { // don't decrement from -1 + t.retry-- + } + obj.taskQueueLock.Lock() + if obj.taskQueue != nil { // killed signal + obj.taskQueue = append(obj.taskQueue, t) + } + obj.taskQueueLock.Unlock() + } + continue + } + + // block + for { + if obj.Debug { + obj.Logf("%s: run...", t) + } + err := t.fn() + if obj.Debug { + obj.Logf("%s: done: %v", t, err) + } + if err != nil { + if t.retry == 0 { + break + } + if t.retry > 0 { // don't decrement from -1 + t.retry-- + } + } + } + } +} diff --git a/etcd/util.go b/etcd/util.go new file mode 100644 index 00000000..63a0d643 --- /dev/null +++ b/etcd/util.go @@ -0,0 +1,173 @@ +// Mgmt +// Copyright (C) 2013-2019+ James Shubin and the project contributors +// Written by James Shubin and the project contributors +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package etcd + +// TODO: move to sub-package if this expands in utility or is used elsewhere... + +import ( + "fmt" + "net/url" + "strings" + + "github.com/purpleidea/mgmt/util/errwrap" + + etcdtypes "github.com/coreos/etcd/pkg/types" +) + +// copyURL copies a URL. +// TODO: submit this upstream to etcd ? +func copyURL(u *url.URL) (*url.URL, error) { + if u == nil { + return nil, fmt.Errorf("empty URL specified") + } + return url.Parse(u.String()) // copy it +} + +// copyURLs copies a URLs. +// TODO: submit this upstream to etcd ? +func copyURLs(urls etcdtypes.URLs) (etcdtypes.URLs, error) { + out := []url.URL{} + for _, x := range urls { + u, err := copyURL(&x) + if err != nil { + return nil, err + } + out = append(out, *u) + } + return out, nil +} + +// copyURLsMap copies a URLsMap. +// TODO: submit this upstream to etcd ? +func copyURLsMap(urlsMap etcdtypes.URLsMap) (etcdtypes.URLsMap, error) { + out := make(etcdtypes.URLsMap) + for k, v := range urlsMap { + urls, err := copyURLs(v) + if err != nil { + return nil, err + } + out[k] = urls + } + return out, nil +} + +// cmpURLs compares two URLs, and returns nil if they are the same. +func cmpURLs(u1, u2 etcdtypes.URLs) error { + if (u1 == nil) != (u2 == nil) { // xor + return fmt.Errorf("lists differ") + } + if len(u1) != len(u2) { + return fmt.Errorf("length of lists is not the same") + } + + for i, v1 := range u1 { + if v1 != u2[i] { + return fmt.Errorf("index %d differs", i) + } + } + + return nil +} + +// cmpURLsMap compares two URLsMap's, and returns nil if they are the same. +func cmpURLsMap(m1, m2 etcdtypes.URLsMap) error { + if (m1 == nil) != (m2 == nil) { // xor + return fmt.Errorf("maps differ") + } + if len(m1) != len(m2) { + return fmt.Errorf("length of maps is not the same") + } + + for k, v1 := range m1 { + v2, exists := m2[k] + if !exists { + return fmt.Errorf("key `%s` not found in map 2", k) + } + if err := cmpURLs(v1, v2); err != nil { + return errwrap.Wrapf(err, "values at key `%s` differ", k) + } + } + + return nil +} + +// newURLsMap is a helper to build a new URLsMap without having to import the +// messy etcdtypes package. +func newURLsMap() etcdtypes.URLsMap { + return make(etcdtypes.URLsMap) +} + +func fromURLsToStringList(urls etcdtypes.URLs) []string { + result := []string{} + for _, u := range urls { // flatten map + result = append(result, u.String()) // use full url including scheme + } + return result +} + +// fromURLsMapToStringList flattens a map of URLs into a single string list. +// Remember to sort the result if you want it to be deterministic! +func fromURLsMapToStringList(m etcdtypes.URLsMap) []string { + result := []string{} + for _, x := range m { // flatten map + for _, u := range x { + result = append(result, u.String()) // use full url including scheme + } + } + return result +} + +// validateURLsMap checks if each embedded URL is parseable correctly. +//func validateURLsMap(urlsMap etcdtypes.URLsMap) error { +// _, err := copyURLsMap(urlsMap) // would fail if anything didn't parse +// return err +//} + +// localhostURLs returns the most localhost like URLs for direct connection. +// This gets clients to talk to the local servers first before looking remotely. +// TODO: improve this algorithm as it's currently a bad heuristic +func localhostURLs(urls etcdtypes.URLs) etcdtypes.URLs { + out := etcdtypes.URLs{} + for _, u := range urls { + // "localhost" or anything in 127.0.0.0/8 is valid! + if strings.HasPrefix(u.Host, "localhost") || strings.HasPrefix(u.Host, "127.") { + out = append(out, u) + continue + } + // or ipv6 localhost + // TODO: are there others to add here? + if strings.HasPrefix(u.Host, "[::1]") { + out = append(out, u) + continue + } + // or local unix domain sockets + if u.Scheme == "unix" { + out = append(out, u) + continue + } + } + return out +} + +//func urlRemoveScheme(urls etcdtypes.URLs) []string { +// strs := []string{} +// for _, u := range urls { +// strs = append(strs, u.Host) // remove http:// prefix +// } +// return strs +//} diff --git a/etcd/util_test.go b/etcd/util_test.go new file mode 100644 index 00000000..5621e570 --- /dev/null +++ b/etcd/util_test.go @@ -0,0 +1,189 @@ +// Mgmt +// Copyright (C) 2013-2019+ James Shubin and the project contributors +// Written by James Shubin and the project contributors +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +// +build !root + +package etcd + +import ( + "net/url" + "testing" +) + +func TestCopyURL0(t *testing.T) { + // list of urls to test + strs := []string{ + "", + "http://192.168.13.42:2379", + "https://192.168.13.42:2380", + "http://192.168.13.42", + "https://192.168.13.42", + } + for _, str := range strs { + t.Logf("testing: `%s`", str) + u1, err := url.Parse(str) + if err != nil { + t.Errorf("url did not parse: %+v", err) + continue + } + + u2, err := copyURL(u1) + if err != nil { + t.Errorf("url did not copy: %+v", err) + continue + } + + if s := u2.String(); s != str { + t.Errorf("url did not cmp, got: `%s`, expected: `%s`", s, str) + } + + // bonus test (add to separate lists of size one) + if err := cmpURLs([]url.URL{*u1}, []url.URL{*u2}); err != nil { + t.Errorf("urls did not cmp, err: %+v", err) + } + } +} + +func TestCopyURLs0(t *testing.T) { + // list of urls lists to test + nstrs := [][]string{ + {}, // empty! + { + "http://192.168.13.42:2379", + "https://192.168.13.42:2380", + "http://192.168.13.42", + "https://192.168.13.42", + }, + { + "http://192.168.42.42:2379", + "https://192.168.13.42:2380", + "http://192.168.99.42", + "https://10.10.1.255", + }, + { + "http://example.com:2379", + "https://purpleidea.com/:2379", + "http://192.168.13.42", + "https://192.168.13.42", + }, + } + for _, strs := range nstrs { + t.Logf("testing: `%s`", strs) + + urls1 := []url.URL{} + for _, str := range strs { + u, err := url.Parse(str) + if err != nil { + t.Errorf("url did not parse: %+v", err) + continue + } + urls1 = append(urls1, *u) + } + + urls2, err := copyURLs(urls1) + if err != nil { + t.Errorf("urls did not copy: %+v", err) + continue + } + + if err := cmpURLs(urls1, urls2); err != nil { + t.Errorf("urls did not cmp, err: %+v", err) + } + } +} + +func TestCopyURLsMap0(t *testing.T) { + // list of urls lists to test + nmstrs := []map[string][]string{ + {}, // empty! + { + "h1": []string{}, // empty + "h2": []string{}, // empty + "h3": []string{}, // empty + }, + { + "h1": []string{}, // empty + "h2": nil, // nil ! + "h3": []string{}, // empty + }, + { + "h1": []string{}, // empty + "h2": []string{ + "http://example.com:2379", + "https://purpleidea.com/:2379", + "http://192.168.13.42", + "https://192.168.13.42", + }, + }, + { + "h1": []string{ + "http://192.168.13.42:2379", + "https://192.168.13.42:2380", + "http://192.168.13.42", + "https://192.168.13.42", + }, + "h2": []string{ + "http://example.com:2379", + "https://purpleidea.com/:2379", + "http://192.168.13.42", + "https://192.168.13.42", + }, + }, + { + "h1": []string{ + "http://192.168.13.42:2379", + "https://192.168.13.42:2380", + "http://192.168.13.42", + "https://192.168.13.42", + }, + "h2": nil, // nil ! + "h3": []string{ + "http://example.com:2379", + "https://purpleidea.com/:2379", + "http://192.168.13.42", + "https://192.168.13.42", + }, + }, + } + + for _, mstrs := range nmstrs { + t.Logf("testing: `%s`", mstrs) + urlsMap1 := newURLsMap() + for key, strs := range mstrs { + urls := []url.URL{} + for _, str := range strs { + u, err := url.Parse(str) + if err != nil { + t.Errorf("url did not parse: %+v", err) + continue + } + urls = append(urls, *u) + } + urlsMap1[key] = urls + } + + urlsMap2, err := copyURLsMap(urlsMap1) + if err != nil { + t.Errorf("urlsMap did not copy: %+v", err) + continue + } + + if err := cmpURLsMap(urlsMap1, urlsMap2); err != nil { + t.Errorf("urlsMap did not cmp, err: %+v", err) + } + } +} diff --git a/etcd/world.go b/etcd/world.go index 0240bcab..3ec9099b 100644 --- a/etcd/world.go +++ b/etcd/world.go @@ -18,19 +18,27 @@ package etcd import ( + "context" "fmt" "net/url" "strings" "github.com/purpleidea/mgmt/engine" + "github.com/purpleidea/mgmt/etcd/chooser" + "github.com/purpleidea/mgmt/etcd/client" + "github.com/purpleidea/mgmt/etcd/client/resources" + "github.com/purpleidea/mgmt/etcd/client/str" + "github.com/purpleidea/mgmt/etcd/client/strmap" etcdfs "github.com/purpleidea/mgmt/etcd/fs" + "github.com/purpleidea/mgmt/etcd/interfaces" "github.com/purpleidea/mgmt/etcd/scheduler" + "github.com/purpleidea/mgmt/util" ) // World is an etcd backed implementation of the World interface. type World struct { Hostname string // uuid for the consumer of these - EmbdEtcd *EmbdEtcd + Client interfaces.Client MetadataPrefix string // expected metadata prefix StoragePrefix string // storage prefix for etcdfs storage StandaloneFs engine.Fs // store an fs here for local usage @@ -40,72 +48,113 @@ type World struct { // ResWatch returns a channel which spits out events on possible exported // resource changes. -func (obj *World) ResWatch() chan error { - return WatchResources(obj.EmbdEtcd) +func (obj *World) ResWatch(ctx context.Context) (chan error, error) { + return resources.WatchResources(ctx, obj.Client) } // ResExport exports a list of resources under our hostname namespace. // Subsequent calls replace the previously set collection atomically. -func (obj *World) ResExport(resourceList []engine.Res) error { - return SetResources(obj.EmbdEtcd, obj.Hostname, resourceList) +func (obj *World) ResExport(ctx context.Context, resourceList []engine.Res) error { + return resources.SetResources(ctx, obj.Client, obj.Hostname, resourceList) } // ResCollect gets the collection of exported resources which match the filter. // It does this atomically so that a call always returns a complete collection. -func (obj *World) ResCollect(hostnameFilter, kindFilter []string) ([]engine.Res, error) { +func (obj *World) ResCollect(ctx context.Context, hostnameFilter, kindFilter []string) ([]engine.Res, error) { // XXX: should we be restricted to retrieving resources that were // exported with a tag that allows or restricts our hostname? We could // enforce that here if the underlying API supported it... Add this? - return GetResources(obj.EmbdEtcd, hostnameFilter, kindFilter) + return resources.GetResources(ctx, obj.Client, hostnameFilter, kindFilter) +} + +// IdealClusterSizeWatch returns a stream of errors anytime the cluster-wide +// dynamic cluster size setpoint changes. +func (obj *World) IdealClusterSizeWatch(ctx context.Context) (chan error, error) { + c := client.NewClientFromSimple(obj.Client, ChooserPath) + if err := c.Init(); err != nil { + return nil, err + } + util.WgFromCtx(ctx).Add(1) + go func() { + util.WgFromCtx(ctx).Done() + // This must get closed *after* because it will not finish until + // the Watcher returns, because it contains a wg.Wait() in it... + defer c.Close() // ignore error + select { + case <-ctx.Done(): + } + }() + return c.Watcher(ctx, chooser.IdealDynamicSizePath) +} + +// IdealClusterSizeGet gets the cluster-wide dynamic cluster size setpoint. +func (obj *World) IdealClusterSizeGet(ctx context.Context) (uint16, error) { + c := client.NewClientFromSimple(obj.Client, ChooserPath) + if err := c.Init(); err != nil { + return 0, err + } + defer c.Close() // ignore error + return chooser.DynamicSizeGet(ctx, c) // use client with added namespace +} + +// IdealClusterSizeSet sets the cluster-wide dynamic cluster size setpoint. +func (obj *World) IdealClusterSizeSet(ctx context.Context, size uint16) (bool, error) { + c := client.NewClientFromSimple(obj.Client, ChooserPath) + if err := c.Init(); err != nil { + return false, err + } + defer c.Close() // ignore error + return chooser.DynamicSizeSet(ctx, c, size) } // StrWatch returns a channel which spits out events on possible string changes. -func (obj *World) StrWatch(namespace string) chan error { - return WatchStr(obj.EmbdEtcd, namespace) +func (obj *World) StrWatch(ctx context.Context, namespace string) (chan error, error) { + return str.WatchStr(ctx, obj.Client, namespace) } // StrIsNotExist returns whether the error from StrGet is a key missing error. func (obj *World) StrIsNotExist(err error) bool { - return err == ErrNotExist + return err == interfaces.ErrNotExist } // StrGet returns the value for the the given namespace. -func (obj *World) StrGet(namespace string) (string, error) { - return GetStr(obj.EmbdEtcd, namespace) +func (obj *World) StrGet(ctx context.Context, namespace string) (string, error) { + return str.GetStr(ctx, obj.Client, namespace) } // StrSet sets the namespace value to a particular string. -func (obj *World) StrSet(namespace, value string) error { - return SetStr(obj.EmbdEtcd, namespace, &value) +func (obj *World) StrSet(ctx context.Context, namespace, value string) error { + return str.SetStr(ctx, obj.Client, namespace, &value) } // StrDel deletes the value in a particular namespace. -func (obj *World) StrDel(namespace string) error { - return SetStr(obj.EmbdEtcd, namespace, nil) +func (obj *World) StrDel(ctx context.Context, namespace string) error { + return str.SetStr(ctx, obj.Client, namespace, nil) } // StrMapWatch returns a channel which spits out events on possible string changes. -func (obj *World) StrMapWatch(namespace string) chan error { - return WatchStrMap(obj.EmbdEtcd, namespace) +func (obj *World) StrMapWatch(ctx context.Context, namespace string) (chan error, error) { + return strmap.WatchStrMap(ctx, obj.Client, namespace) } // StrMapGet returns a map of hostnames to values in the given namespace. -func (obj *World) StrMapGet(namespace string) (map[string]string, error) { - return GetStrMap(obj.EmbdEtcd, []string{}, namespace) +func (obj *World) StrMapGet(ctx context.Context, namespace string) (map[string]string, error) { + return strmap.GetStrMap(ctx, obj.Client, []string{}, namespace) } // StrMapSet sets the namespace value to a particular string under the identity // of its own hostname. -func (obj *World) StrMapSet(namespace, value string) error { - return SetStrMap(obj.EmbdEtcd, obj.Hostname, namespace, &value) +func (obj *World) StrMapSet(ctx context.Context, namespace, value string) error { + return strmap.SetStrMap(ctx, obj.Client, obj.Hostname, namespace, &value) } // StrMapDel deletes the value in a particular namespace. -func (obj *World) StrMapDel(namespace string) error { - return SetStrMap(obj.EmbdEtcd, obj.Hostname, namespace, nil) +func (obj *World) StrMapDel(ctx context.Context, namespace string) error { + return strmap.SetStrMap(ctx, obj.Client, obj.Hostname, namespace, nil) } // Scheduler returns a scheduling result of hosts in a particular namespace. +// XXX: Add a context.Context here func (obj *World) Scheduler(namespace string, opts ...scheduler.Option) (*scheduler.Result, error) { modifiedOpts := []scheduler.Option{} for _, o := range opts { @@ -115,7 +164,8 @@ func (obj *World) Scheduler(namespace string, opts ...scheduler.Option) (*schedu modifiedOpts = append(modifiedOpts, scheduler.Debug(obj.Debug)) modifiedOpts = append(modifiedOpts, scheduler.Logf(obj.Logf)) - return scheduler.Schedule(obj.EmbdEtcd.GetClient(), fmt.Sprintf("%s/scheduler/%s", NS, namespace), obj.Hostname, modifiedOpts...) + path := fmt.Sprintf(schedulerPathFmt, namespace) + return scheduler.Schedule(obj.Client.GetClient(), path, obj.Hostname, modifiedOpts...) } // Fs returns a distributed file system from a unique URI. For single host @@ -144,9 +194,14 @@ func (obj *World) Fs(uri string) (engine.Fs, error) { } etcdFs := &etcdfs.Fs{ - Client: obj.EmbdEtcd.GetClient(), + Client: obj.Client, // TODO: do we need to add a namespace? Metadata: u.Path, DataPrefix: obj.StoragePrefix, + + Debug: obj.Debug, + Logf: func(format string, v ...interface{}) { + obj.Logf("fs: "+format, v...) + }, } return etcdFs, nil } diff --git a/examples/lang/etcd-config0.mcl b/examples/lang/etcd-config0.mcl new file mode 100644 index 00000000..5f4cfde4 --- /dev/null +++ b/examples/lang/etcd-config0.mcl @@ -0,0 +1,4 @@ +# sets a cluster parameter, safe to be called identically from multiple machines +config:etcd "whatever" { + idealclustersize => 7, +} diff --git a/examples/lang/exchange0.mcl b/examples/lang/exchange0.mcl index a6d1f3bf..9e855ce6 100644 --- a/examples/lang/exchange0.mcl +++ b/examples/lang/exchange0.mcl @@ -1,9 +1,10 @@ # run this example with these commands # watch -n 0.1 'tail *' # run this in /tmp/mgmt/ -# time ./mgmt run --hostname h1 --ideal-cluster-size 1 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl -# time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl -# time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl -# time ./mgmt run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl +# time ./mgmt run --hostname h1 --tmp-prefix --no-pgp empty +# time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp empty +# time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp empty +# time ./mgmt run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp empty +# time ./mgmt deploy --no-git --seeds http://127.0.0.1:2379 lang --lang examples/lang/exchange0.mcl import "sys" import "world" diff --git a/lang/funcs/core/world/exchange_func.go b/lang/funcs/core/world/exchange_func.go index bb1af6ad..ce94c66d 100644 --- a/lang/funcs/core/world/exchange_func.go +++ b/lang/funcs/core/world/exchange_func.go @@ -18,6 +18,7 @@ package coreworld import ( + "context" "fmt" "github.com/purpleidea/mgmt/lang/funcs" @@ -75,6 +76,8 @@ func (obj *ExchangeFunc) Init(init *interfaces.Init) error { // Stream returns the changing values that this func has over time. func (obj *ExchangeFunc) Stream() error { defer close(obj.init.Output) // the sender closes + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() for { select { // TODO: should this first chan be run as a priority channel to @@ -105,8 +108,13 @@ func (obj *ExchangeFunc) Stream() error { // TODO: support changing the namespace over time... // TODO: possibly removing our stored value there first! if obj.namespace == "" { - obj.namespace = namespace // store it - obj.watchChan = obj.init.World.StrMapWatch(obj.namespace) // watch for var changes + obj.namespace = namespace // store it + var err error + obj.watchChan, err = obj.init.World.StrMapWatch(ctx, obj.namespace) // watch for var changes + if err != nil { + return err + } + } else if obj.namespace != namespace { return fmt.Errorf("can't change namespace, previously: `%s`", obj.namespace) } @@ -116,7 +124,7 @@ func (obj *ExchangeFunc) Stream() error { obj.init.Logf("value: %+v", value) } - if err := obj.init.World.StrMapSet(obj.namespace, value); err != nil { + if err := obj.init.World.StrMapSet(ctx, obj.namespace, value); err != nil { return errwrap.Wrapf(err, "namespace write error of `%s` to `%s`", value, obj.namespace) } @@ -134,7 +142,7 @@ func (obj *ExchangeFunc) Stream() error { return errwrap.Wrapf(err, "channel watch failed on `%s`", obj.namespace) } - keyMap, err := obj.init.World.StrMapGet(obj.namespace) + keyMap, err := obj.init.World.StrMapGet(ctx, obj.namespace) if err != nil { return errwrap.Wrapf(err, "channel read failed on `%s`", obj.namespace) } diff --git a/lang/funcs/core/world/kvlookup_func.go b/lang/funcs/core/world/kvlookup_func.go index ec0e8a11..03cc4771 100644 --- a/lang/funcs/core/world/kvlookup_func.go +++ b/lang/funcs/core/world/kvlookup_func.go @@ -18,6 +18,7 @@ package coreworld import ( + "context" "fmt" "github.com/purpleidea/mgmt/lang/funcs" @@ -73,6 +74,8 @@ func (obj *KVLookupFunc) Init(init *interfaces.Init) error { // Stream returns the changing values that this func has over time. func (obj *KVLookupFunc) Stream() error { defer close(obj.init.Output) // the sender closes + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() for { select { // TODO: should this first chan be run as a priority channel to @@ -103,10 +106,14 @@ func (obj *KVLookupFunc) Stream() error { // TODO: support changing the namespace over time... // TODO: possibly removing our stored value there first! if obj.namespace == "" { - obj.namespace = namespace // store it - obj.watchChan = obj.init.World.StrMapWatch(obj.namespace) // watch for var changes + obj.namespace = namespace // store it + var err error + obj.watchChan, err = obj.init.World.StrMapWatch(ctx, obj.namespace) // watch for var changes + if err != nil { + return err + } - result, err := obj.buildMap() // build the map... + result, err := obj.buildMap(ctx) // build the map... if err != nil { return err } @@ -135,7 +142,7 @@ func (obj *KVLookupFunc) Stream() error { return errwrap.Wrapf(err, "channel watch failed on `%s`", obj.namespace) } - result, err := obj.buildMap() // build the map... + result, err := obj.buildMap(ctx) // build the map... if err != nil { return err } @@ -166,8 +173,8 @@ func (obj *KVLookupFunc) Close() error { } // buildMap builds the result map which we'll need. It uses struct variables. -func (obj *KVLookupFunc) buildMap() (types.Value, error) { - keyMap, err := obj.init.World.StrMapGet(obj.namespace) +func (obj *KVLookupFunc) buildMap(ctx context.Context) (types.Value, error) { + keyMap, err := obj.init.World.StrMapGet(ctx, obj.namespace) if err != nil { return nil, errwrap.Wrapf(err, "channel read failed on `%s`", obj.namespace) } diff --git a/lang/funcs/core/world/schedule_func.go b/lang/funcs/core/world/schedule_func.go index 8bf5a7c2..feeb1e4d 100644 --- a/lang/funcs/core/world/schedule_func.go +++ b/lang/funcs/core/world/schedule_func.go @@ -16,7 +16,7 @@ // along with this program. If not, see . // test with: -// time ./mgmt run --hostname h1 --ideal-cluster-size 1 --tmp-prefix --no-pgp lang --lang examples/lang/schedule0.mcl +// time ./mgmt run --hostname h1 --tmp-prefix --no-pgp lang --lang examples/lang/schedule0.mcl // time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp lang --lang examples/lang/schedule0.mcl // time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp lang --lang examples/lang/schedule0.mcl // kill h2 (should see h1 and h3 pick [h1, h3] instead) diff --git a/lib/deploy.go b/lib/deploy.go index 55b9d46c..0e0dcb26 100644 --- a/lib/deploy.go +++ b/lib/deploy.go @@ -18,11 +18,13 @@ package lib import ( + "context" "fmt" "log" "os" - "github.com/purpleidea/mgmt/etcd" + "github.com/purpleidea/mgmt/etcd/client" + "github.com/purpleidea/mgmt/etcd/deployer" etcdfs "github.com/purpleidea/mgmt/etcd/fs" "github.com/purpleidea/mgmt/gapi" "github.com/purpleidea/mgmt/util/errwrap" @@ -34,12 +36,13 @@ import ( const ( // MetadataPrefix is the etcd prefix where all our fs superblocks live. - MetadataPrefix = etcd.NS + "/fs" + MetadataPrefix = "/fs" // StoragePrefix is the etcd prefix where all our fs data lives. - StoragePrefix = etcd.NS + "/storage" + StoragePrefix = "/storage" ) // deploy is the cli target to manage deploys to our cluster. +// TODO: add a timeout and/or cancel signal to replace context.TODO() func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error { cliContext := c.Parent() if cliContext == nil { @@ -55,7 +58,12 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error { debug = flags.Debug } } + Logf := func(format string, v ...interface{}) { + log.Printf("deploy: "+format, v...) + } + hello(program, version, flags) // say hello! + defer Logf("goodbye!") var hash, pHash string if !cliContext.Bool("no-git") { @@ -74,7 +82,7 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error { } hash = head.Hash().String() // current commit id - log.Printf("deploy: hash: %s", hash) + Logf("hash: %s", hash) lo := &git.LogOptions{ From: head.Hash(), @@ -90,7 +98,7 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error { if err == nil { // errors are okay, we might be empty pHash = commit.Hash.String() // previous commit id } - log.Printf("deploy: previous deploy hash: %s", pHash) + Logf("previous deploy hash: %s", pHash) if cliContext.Bool("force") { pHash = "" // don't check this :( } @@ -101,28 +109,58 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error { uniqueid := uuid.New() // panic's if it can't generate one :P - etcdClient := &etcd.ClientEtcd{ - Seeds: cliContext.StringSlice("seeds"), // endpoints + etcdClient := client.NewClientFromSeedsNamespace( + cliContext.StringSlice("seeds"), // endpoints + NS, + ) + if err := etcdClient.Init(); err != nil { + return errwrap.Wrapf(err, "client Init failed") } - if err := etcdClient.Connect(); err != nil { - return errwrap.Wrapf(err, "client connection error") + defer func() { + err := errwrap.Wrapf(etcdClient.Close(), "client Close failed") + if err != nil { + // TODO: cause the final exit code to be non-zero + Logf("client cleanup error: %+v", err) + } + }() + + simpleDeploy := &deployer.SimpleDeploy{ + Client: etcdClient, + Debug: debug, + Logf: func(format string, v ...interface{}) { + Logf("deploy: "+format, v...) + }, } - defer etcdClient.Destroy() + if err := simpleDeploy.Init(); err != nil { + return errwrap.Wrapf(err, "deploy Init failed") + } + defer func() { + err := errwrap.Wrapf(simpleDeploy.Close(), "deploy Close failed") + if err != nil { + // TODO: cause the final exit code to be non-zero + Logf("deploy cleanup error: %+v", err) + } + }() // get max id (from all the previous deploys) - max, err := etcd.GetMaxDeployID(etcdClient) + max, err := simpleDeploy.GetMaxDeployID(context.TODO()) if err != nil { return errwrap.Wrapf(err, "error getting max deploy id") } // find the latest id var id = max + 1 // next id - log.Printf("deploy: max deploy id: %d", max) + Logf("previous max deploy id: %d", max) etcdFs := &etcdfs.Fs{ - Client: etcdClient.GetClient(), + Client: etcdClient, // TODO: using a uuid is meant as a temporary measure, i hate them Metadata: MetadataPrefix + fmt.Sprintf("/deploy/%d-%s", id, uniqueid), DataPrefix: StoragePrefix, + + Debug: debug, + Logf: func(format string, v ...interface{}) { + Logf("fs: "+format, v...) + }, } cliInfo := &gapi.CliInfo{ @@ -154,9 +192,9 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error { } // this nominally checks the previous git hash matches our expectation - if err := etcd.AddDeploy(etcdClient, id, hash, pHash, &str); err != nil { + if err := simpleDeploy.AddDeploy(context.TODO(), id, hash, pHash, &str); err != nil { return errwrap.Wrapf(err, "could not create deploy id `%d`", id) } - log.Printf("deploy: success, id: %d", id) + Logf("success, id: %d", id) return nil } diff --git a/lib/hello.go b/lib/hello.go index 9355dd23..2ebf16b9 100644 --- a/lib/hello.go +++ b/lib/hello.go @@ -37,6 +37,7 @@ func hello(program, version string, flags Flags) { log.SetFlags(logFlags) // un-hijack from capnslog... + // XXX: move this to the etcd package when new version deprecates capnslog log.SetOutput(os.Stderr) if flags.Verbose { capnslog.SetFormatter(capnslog.NewLogFormatter(os.Stderr, "(etcd) ", logFlags)) diff --git a/lib/main.go b/lib/main.go index a378c712..4686d133 100644 --- a/lib/main.go +++ b/lib/main.go @@ -18,6 +18,7 @@ package lib import ( + "context" "fmt" "io/ioutil" "log" @@ -33,6 +34,8 @@ import ( "github.com/purpleidea/mgmt/engine/graph/autogroup" _ "github.com/purpleidea/mgmt/engine/resources" // let register's run "github.com/purpleidea/mgmt/etcd" + "github.com/purpleidea/mgmt/etcd/chooser" + "github.com/purpleidea/mgmt/etcd/deployer" "github.com/purpleidea/mgmt/gapi" "github.com/purpleidea/mgmt/gapi/empty" "github.com/purpleidea/mgmt/pgp" @@ -44,10 +47,14 @@ import ( etcdtypes "github.com/coreos/etcd/pkg/types" ) +const ( + // NS is the root namespace for etcd operations. All keys must use it! + NS = "/_mgmt" // must not end with a slash! +) + // Flags are some constant flags which are used throughout the program. type Flags struct { Debug bool // add additional log messages - Trace bool // add execution flow log messages Verbose bool // add extra log message output } @@ -105,7 +112,8 @@ type Main struct { Prometheus bool // enable prometheus metrics PrometheusListen string // prometheus instance bind specification - ge *graph.Engine + embdEtcd *etcd.EmbdEtcd // TODO: can be an interface in the future... + ge *graph.Engine exit *util.EasyExit // exit signal cleanup []func() error // list of functions to run on close @@ -140,7 +148,7 @@ func (obj *Main) Init() error { obj.idealClusterSize = uint16(obj.IdealClusterSize) if obj.IdealClusterSize < 0 { // value is undefined, set to the default - obj.idealClusterSize = etcd.DefaultIdealClusterSize + obj.idealClusterSize = chooser.DefaultIdealDynamicSize } if obj.idealClusterSize < 1 { @@ -194,7 +202,8 @@ func (obj *Main) Run() error { hello(obj.Program, obj.Version, obj.Flags) // say hello! defer Logf("goodbye!") - defer obj.exit.Done(nil) // ensure this gets called even if Exit doesn't + exitCtx := obj.exit.Context() // local exit signal + defer obj.exit.Done(nil) // ensure this gets called even if Exit doesn't hostname, err := os.Hostname() // a sensible default // allow passing in the hostname, instead of using the system setting @@ -243,13 +252,14 @@ func (obj *Main) Run() error { if err := prom.InitKindMetrics(engine.RegisteredResourcesNames()); err != nil { return errwrap.Wrapf(err, "can't initialize kind-specific prometheus metrics") } - obj.cleanup = append(obj.cleanup, func() error { + defer func() { Logf("prometheus: stopping instance") - if err := prom.Stop(); err != nil { - return errwrap.Wrapf(err, "the prometheus instance exited poorly") + err := errwrap.Wrapf(prom.Stop(), "the prometheus instance exited poorly") + if err != nil { + // TODO: cause the final exit code to be non-zero + Logf("cleanup error: %+v", err) } - return nil - }) + }() } if !obj.NoPgp { @@ -296,6 +306,8 @@ func (obj *Main) Run() error { exitchan := make(chan struct{}) // exit on close wg := &sync.WaitGroup{} // waitgroup for inner loop & goroutines + defer wg.Wait() // wait in case we have an early exit + defer obj.exit.Done(nil) // trigger exit in case something blocks // exit after `max-runtime` seconds for no reason at all... if i := obj.MaxRuntime; i > 0 { @@ -335,63 +347,108 @@ func (obj *Main) Run() error { // XXX: should this be moved to later in the code? go converger.Run(true) // main loop for converger, true to start paused converger.Ready() // block until ready - obj.cleanup = append(obj.cleanup, func() error { + defer func() { // TODO: shutdown converger, but make sure that using it in a // still running embdEtcd struct doesn't block waiting on it... converger.Shutdown() - return nil - }) + }() // embedded etcd if len(obj.seeds) == 0 { - Logf("etcd: seeds: no seeds specified!") + Logf("no seeds specified!") } else { - Logf("etcd: seeds(%d): %+v", len(obj.seeds), obj.seeds) + Logf("seeds(%d): %+v", len(obj.seeds), obj.seeds) } - embdEtcd := etcd.NewEmbdEtcd( - hostname, - obj.seeds, - obj.clientURLs, - obj.serverURLs, - obj.advertiseClientURLs, - obj.advertiseServerURLs, - obj.NoServer, - obj.NoNetwork, - obj.idealClusterSize, - etcd.Flags{ - Debug: obj.Flags.Debug, - Trace: obj.Flags.Trace, - Verbose: obj.Flags.Verbose, - }, - prefix, - converger, - ) - if embdEtcd == nil { - return fmt.Errorf("etcd: creation failed") - } else if err := embdEtcd.Startup(); err != nil { // startup (returns when etcd main loop is running) - return errwrap.Wrapf(err, "etcd: startup failed") - } - obj.cleanup = append(obj.cleanup, func() error { - // cleanup etcd main loop last so it can process everything first - err := embdEtcd.Destroy() // shutdown and cleanup etcd - return errwrap.Wrapf(err, "etcd: exited poorly") - }) + obj.embdEtcd = &etcd.EmbdEtcd{ + Hostname: hostname, + Seeds: obj.seeds, - // wait for etcd server to be ready before continuing... - // XXX: this is wrong if we're not going to be a server! we'll block!!! - // select { - // case <-embdEtcd.ServerReady(): - // Logf("etcd: server: ready!") - // // pass - // case <-time.After(((etcd.MaxStartServerTimeout * etcd.MaxStartServerRetries) + 1) * time.Second): - // return fmt.Errorf("etcd: startup timeout") - // } - time.Sleep(1 * time.Second) // XXX: temporary workaround + ClientURLs: obj.clientURLs, + ServerURLs: obj.serverURLs, + AClientURLs: obj.advertiseClientURLs, + AServerURLs: obj.advertiseServerURLs, + + NoServer: obj.NoServer, + NoNetwork: obj.NoNetwork, + + Chooser: &chooser.DynamicSize{ + IdealClusterSize: obj.idealClusterSize, + }, + + Converger: converger, + + NS: NS, // namespace + Prefix: fmt.Sprintf("%s/", path.Join(prefix, "etcd")), + + Debug: obj.Flags.Debug, + Logf: func(format string, v ...interface{}) { + log.Printf("etcd: "+format, v...) + }, + } + if err := obj.embdEtcd.Init(); err != nil { + return errwrap.Wrapf(err, "etcd init failed") + } + defer func() { + // cleanup etcd main loop last so it can process everything first + err := errwrap.Wrapf(obj.embdEtcd.Close(), "etcd close failed") + if err != nil { + // TODO: cause the final exit code to be non-zero + Logf("cleanup error: %+v", err) + } + }() + + var etcdErr error + // don't add a wait group here, this is done in embdEtcd.Destroy() + go func() { + etcdErr = obj.embdEtcd.Run() // returns when it shuts down... + obj.exit.Done(errwrap.Wrapf(etcdErr, "etcd run failed")) // trigger exit + }() + // tell etcd to shutdown, blocks until done! + // TODO: handle/report error? + defer obj.embdEtcd.Destroy() + + // wait for etcd to be ready before continuing... + // TODO: do we need to add a timeout here? + select { + case <-obj.embdEtcd.Ready(): + Logf("etcd is ready!") + // pass + + case <-obj.embdEtcd.Exited(): + Logf("etcd was destroyed!") + err := fmt.Errorf("etcd was destroyed on startup") + if etcdErr != nil { + err = etcdErr + } + return err + } + // TODO: should getting a client from EmbdEtcd already come with the NS? + etcdClient, err := obj.embdEtcd.MakeClientFromNamespace(NS) + if err != nil { + return errwrap.Wrapf(err, "make Client failed") + } + simpleDeploy := &deployer.SimpleDeploy{ + Client: etcdClient, + Debug: obj.Flags.Debug, + Logf: func(format string, v ...interface{}) { + log.Printf("deploy: "+format, v...) + }, + } + if err := simpleDeploy.Init(); err != nil { + return errwrap.Wrapf(err, "deploy Init failed") + } + defer func() { + err := errwrap.Wrapf(simpleDeploy.Close(), "deploy Close failed") + if err != nil { + // TODO: cause the final exit code to be non-zero + Logf("cleanup error: %+v", err) + } + }() // implementation of the World API (alternatives can be substituted in) world := &etcd.World{ Hostname: hostname, - EmbdEtcd: embdEtcd, + Client: etcdClient, MetadataPrefix: MetadataPrefix, StoragePrefix: StoragePrefix, StandaloneFs: obj.DeployFs, // used for static deploys @@ -415,9 +472,16 @@ func (obj *Main) Run() error { } if err := obj.ge.Init(); err != nil { - return errwrap.Wrapf(err, "engine: creation failed") + return errwrap.Wrapf(err, "engine Init failed") } - // After this point, the inner "main loop" must run, so that the engine + defer func() { + err := errwrap.Wrapf(obj.ge.Close(), "engine Close failed") + if err != nil { + // TODO: cause the final exit code to be non-zero + Logf("cleanup error: %+v", err) + } + }() + // After this point, the inner "main loop" will run, so that the engine // can get closed with the deploy close via the deploy chan shutdown... // main loop logic starts here @@ -456,7 +520,7 @@ func (obj *Main) Run() error { obj.ge.Pause(false) } // must be paused before this is run - obj.ge.Close() + //obj.ge.Close() // run in defer instead return // this is the only place we exit } @@ -678,9 +742,10 @@ func (obj *Main) Run() error { // get max id (from all the previous deploys) // this is what the existing cluster is already running - // TODO: can this block since we didn't deploy yet? - max, err := etcd.GetMaxDeployID(embdEtcd) + // TODO: add a timeout to context? + max, err := simpleDeploy.GetMaxDeployID(exitCtx) if err != nil { + close(deployChan) // because we won't close it downstream... return errwrap.Wrapf(err, "error getting max deploy id") } @@ -710,9 +775,24 @@ func (obj *Main) Run() error { // now we can wait for future deploys, but if we already had an // initial deploy from run, don't switch to this unless it's new + ctx, cancel := context.WithCancel(context.Background()) + watchChan, err := simpleDeploy.WatchDeploy(ctx) + if err != nil { + cancel() + Logf("error starting deploy: %+v", err) + return + } + wg.Add(1) + go func() { + defer wg.Done() + defer cancel() // unblock watch deploy + select { // wait until we're ready to shutdown + case <-exitchan: + } + }() + canceled := false + var last uint64 - startChan := make(chan struct{}) // start signal - close(startChan) // kick it off! for { if obj.NoDeployWatch && (obj.Deploy != nil || last > 0) { // block here, because when we close the @@ -725,29 +805,33 @@ func (obj *Main) Run() error { } select { - case <-startChan: // kick the loop once at start - startChan = nil // disable - - case err, ok := <-etcd.WatchDeploy(embdEtcd): + // WatchDeploy should send an initial event now... + case err, ok := <-watchChan: if !ok { - obj.exit.Done(nil) // regular shutdown + // TODO: is any of this needed in here? + if !canceled { + obj.exit.Done(nil) // regular shutdown + } return } + if err == context.Canceled { + canceled = true + continue // channel close is coming... + } if err != nil { // TODO: it broke, can we restart? - obj.exit.Done(fmt.Errorf("deploy: watch error")) - return + obj.exit.Done(errwrap.Wrapf(err, "deploy: watch error")) + continue } - startChan = nil // disable it early... if obj.Flags.Debug { Logf("deploy: got activity") } - case <-exitchan: - return + //case <-exitchan: + // return // exit via channel close instead } - latest, err := etcd.GetMaxDeployID(embdEtcd) // or zero + latest, err := simpleDeploy.GetMaxDeployID(ctx) // or zero if err != nil { Logf("error getting max deploy id: %+v", err) continue @@ -774,7 +858,7 @@ func (obj *Main) Run() error { // 0 passes through an empty deploy without an error... // (unless there is some sort of etcd error that occurs) - str, err := etcd.GetDeploy(embdEtcd, latest) + str, err := simpleDeploy.GetDeploy(ctx, latest) if err != nil { Logf("deploy: error getting deploy: %+v", err) continue @@ -871,6 +955,9 @@ func (obj *Main) FastExit(err error) { // might leave some of your resources in a partial or unknown state. func (obj *Main) Interrupt(err error) { // XXX: implement and run Interrupt API for supported resources - obj.FastExit(err) + + if obj.embdEtcd != nil { + obj.embdEtcd.Interrupt() // unblock borked clusters + } } diff --git a/lib/run.go b/lib/run.go index e30b66bf..cfdcd373 100644 --- a/lib/run.go +++ b/lib/run.go @@ -175,14 +175,19 @@ func run(c *cli.Context, name string, gapiObj gapi.GAPI) error { reterr := obj.Run() if reterr != nil { // log the error message returned - log.Printf("main: Error: %v", reterr) + if obj.Flags.Debug { + log.Printf("main: %+v", reterr) + } } if err := obj.Close(); err != nil { - log.Printf("main: Close: %v", err) + if obj.Flags.Debug { + log.Printf("main: Close: %+v", err) + } if reterr == nil { return err } + reterr = errwrap.Append(reterr, err) } return reterr diff --git a/main.go b/main.go index 7faea99a..cc035df4 100644 --- a/main.go +++ b/main.go @@ -27,7 +27,6 @@ import ( // These constants are some global variables that are used throughout the code. const ( Debug = false // add additional log messages - Trace = false // add execution flow log messages Verbose = false // add extra log message output ) @@ -40,7 +39,6 @@ var ( func main() { flags := mgmt.Flags{ Debug: Debug, - Trace: Trace, Verbose: Verbose, } if err := mgmt.CLI(program, version, flags); err != nil { diff --git a/misc/filter-golang-stack.py b/misc/filter-golang-stack.py index beba8cff..7de6f091 100755 --- a/misc/filter-golang-stack.py +++ b/misc/filter-golang-stack.py @@ -23,17 +23,25 @@ import sys -lines = sys.stdin.readlines() +if len(sys.argv) == 2 and sys.argv[1] != "-": + lines = open(sys.argv[1], "r").readlines() +else: + lines = sys.stdin.readlines() print("read: %d lines" % len(lines)) # find program start +start = -1 for i in range(len(lines)): line = lines[i] if line.startswith("PC="): start=i break +if start == -1: + print("could not find program start, looking for PC=???", file=sys.stderr) + sys.exit(1) + print("starts at line: %d" % (start+1)) # +1 because we're zero based def is_chunk(line): @@ -59,6 +67,18 @@ def filter_chunk(chunk): package_line = lines[1] if package_line.startswith("github.com/purpleidea/mgmt/vendor/"): return False + if package_line.startswith("github.com/") and not package_line.startswith("github.com/purpleidea/mgmt/"): + return False + if package_line.startswith("internal/poll"): + return False + if package_line.startswith("context.propagateCancel"): + return False + if package_line.startswith("runtime.gopark"): + return False + if package_line.startswith("runtime.futex"): + return False + if package_line.startswith("os/signal.signal_recv"): + return False return True diff --git a/test/shell/clustersize.sh b/test/shell/etcd-clustersize.sh similarity index 85% rename from test/shell/clustersize.sh rename to test/shell/etcd-clustersize.sh index 3d7cd50e..fbd613f7 100755 --- a/test/shell/clustersize.sh +++ b/test/shell/etcd-clustersize.sh @@ -10,7 +10,7 @@ if ! command -v etcdctl >/dev/null; then exit 0 fi -mkdir /tmp/mgmt/{A..E} +#mkdir /tmp/mgmt/{A..E} # kill servers on error/exit trap 'pkill -9 mgmt' EXIT @@ -22,7 +22,7 @@ $TIMEOUT "$MGMT" run --hostname h3 --tmp-prefix --no-pgp --seeds http://127.0.0. # wait for everything to converge sleep 30s -ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/idealClusterSize 3 +ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/chooser/dynamicsize/idealclustersize 3 $TIMEOUT "$MGMT" run --hostname h4 --tmp-prefix --no-pgp --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 empty & $TIMEOUT "$MGMT" run --hostname h5 --tmp-prefix --no-pgp --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2387 --server-urls http://127.0.0.1:2388 empty & @@ -32,7 +32,7 @@ sleep 30s test "$(ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 member list | wc -l)" -eq 3 -ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2381 put /_mgmt/idealClusterSize 5 +ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/chooser/dynamicsize/idealclustersize 5 # wait for everything to converge sleep 30s diff --git a/test/shell/etcd-conflicting-server.sh b/test/shell/etcd-conflicting-server.sh new file mode 100755 index 00000000..b11c4194 --- /dev/null +++ b/test/shell/etcd-conflicting-server.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +. "$(dirname "$0")/../util.sh" + +# run empty graphs, we're just testing etcd clustering +$TIMEOUT "$MGMT" run --no-pgp --tmp-prefix empty & +pid1=$! +sleep 15s # let it startup + +# run a second one that should conflict because a server is already running... +$TIMEOUT "$MGMT" run --no-pgp --tmp-prefix empty & +pid2=$! +wait $pid2 +e=$? +if [ $e -eq 0 ]; then + echo "second mgmt exited successfully when error was expected" + exit 1 +fi +if [ $e -ne 1 ]; then + echo "second mgmt exited with unexpected error of $e" + exit $e +fi + +$(kill -SIGINT $pid1)& # send ^C to exit 1st mgmt +wait $pid1 # get exit status +# if pid1 exits because of a timeout, then it blocked, and this is a bug! +exit $? diff --git a/test/shell/etcd-three-hosts-reversed.sh b/test/shell/etcd-three-hosts-reversed.sh new file mode 100755 index 00000000..23fb822e --- /dev/null +++ b/test/shell/etcd-three-hosts-reversed.sh @@ -0,0 +1,35 @@ +#!/bin/bash -e + +. "$(dirname "$0")/../util.sh" + +# run empty graphs, we're just testing etcd clustering +$TIMEOUT "$MGMT" run --hostname h1 --tmp-prefix empty & +pid1=$! +sleep 15s # let it startup + +$TIMEOUT "$MGMT" run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix empty & +pid2=$! +sleep 15s + +$TIMEOUT "$MGMT" run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix empty & +pid3=$! +sleep 15s + +$(sleep 15s && kill -SIGINT $pid1)& # send ^C to exit 1st mgmt (reversed!) +wait $pid1 +e=$? +if [ $e -ne 0 ]; then + exit $e +fi + +$(sleep 15s && kill -SIGINT $pid2)& # send ^C to exit 2nd mgmt +wait $pid2 +e=$? +if [ $e -ne 0 ]; then + exit $e +fi + +$(sleep 15s && kill -SIGINT $pid3)& # send ^C to exit 3rd mgmt (reversed!) +wait $pid3 # get exit status +# if pid3 exits because of a timeout, then it blocked, and this is a bug! +exit $? diff --git a/test/shell/etcd-two-hosts-reversed.sh b/test/shell/etcd-two-hosts-reversed.sh new file mode 100755 index 00000000..3123fb76 --- /dev/null +++ b/test/shell/etcd-two-hosts-reversed.sh @@ -0,0 +1,24 @@ +#!/bin/bash -e + +. "$(dirname "$0")/../util.sh" + +# run empty graphs, we're just testing etcd clustering +$TIMEOUT "$MGMT" run --hostname h1 --tmp-prefix empty & +pid1=$! +sleep 15s # let it startup + +$TIMEOUT "$MGMT" run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix empty & +pid2=$! +sleep 15s + +$(sleep 15s && kill -SIGINT $pid1)& # send ^C to exit 1st mgmt! (reverse!) +wait $pid1 +e=$? +if [ $e -ne 0 ]; then + exit $e +fi + +$(sleep 15s && kill -SIGINT $pid2)& # send ^C to exit 2nd mgmt (reverse!) +wait $pid2 # get exit status +# if pid2 exits because of a timeout, then it blocked, and this is a bug! +exit $? diff --git a/test/shell/exchange.sh b/test/shell/exchange.sh index 3ac9a951..c2a48836 100755 --- a/test/shell/exchange.sh +++ b/test/shell/exchange.sh @@ -5,18 +5,58 @@ set -o errexit set -o pipefail -$TIMEOUT "$MGMT" run --hostname h1 --ideal-cluster-size 1 --tmp-prefix --no-pgp lang --lang exchange0.mcl & -$TIMEOUT "$MGMT" run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp lang --lang exchange0.mcl & -$TIMEOUT "$MGMT" run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp lang --lang exchange0.mcl & -$TIMEOUT "$MGMT" run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp lang --lang exchange0.mcl & +$TIMEOUT "$MGMT" run --hostname h1 --tmp-prefix --no-pgp empty & +pid1=$! +sleep 10s +$TIMEOUT "$MGMT" run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp empty & +pid2=$! +sleep 10s +$TIMEOUT "$MGMT" run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp empty & +pid3=$! +sleep 10s +$TIMEOUT "$MGMT" run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp empty & +pid4=$! +sleep 10s +$TIMEOUT "$MGMT" deploy --no-git --seeds http://127.0.0.1:2379 lang --lang exchange0.mcl # kill servers on error/exit -trap 'pkill -9 mgmt' EXIT +#trap 'pkill -9 mgmt' EXIT # wait for everything to converge -sleep 10s +sleep 15s + +# debug +tail /tmp/mgmt/exchange-* test "$(cat /tmp/mgmt/exchange-* | grep -c h1)" -eq 4 test "$(cat /tmp/mgmt/exchange-* | grep -c h2)" -eq 4 test "$(cat /tmp/mgmt/exchange-* | grep -c h3)" -eq 4 test "$(cat /tmp/mgmt/exchange-* | grep -c h4)" -eq 4 + +$(sleep 15s && kill -SIGINT $pid4)& # send ^C to exit mgmt... +wait $pid4 +e=$? +if [ $e -ne 0 ]; then + exit $e +fi + +$(sleep 15s && kill -SIGINT $pid3)& # send ^C to exit mgmt... +wait $pid3 +e=$? +if [ $e -ne 0 ]; then + exit $e +fi + +$(sleep 15s && kill -SIGINT $pid2)& # send ^C to exit mgmt... +wait $pid2 +e=$? +if [ $e -ne 0 ]; then + exit $e +fi + +$(sleep 15s && kill -SIGINT $pid1)& # send ^C to exit mgmt... +wait $pid1 +e=$? +if [ $e -ne 0 ]; then + exit $e +fi diff --git a/test/shell/exchange0.mcl b/test/shell/exchange0.mcl index a6d1f3bf..9e855ce6 100644 --- a/test/shell/exchange0.mcl +++ b/test/shell/exchange0.mcl @@ -1,9 +1,10 @@ # run this example with these commands # watch -n 0.1 'tail *' # run this in /tmp/mgmt/ -# time ./mgmt run --hostname h1 --ideal-cluster-size 1 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl -# time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl -# time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl -# time ./mgmt run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl +# time ./mgmt run --hostname h1 --tmp-prefix --no-pgp empty +# time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp empty +# time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp empty +# time ./mgmt run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp empty +# time ./mgmt deploy --no-git --seeds http://127.0.0.1:2379 lang --lang examples/lang/exchange0.mcl import "sys" import "world" diff --git a/test/test-govet.sh b/test/test-govet.sh index cd06d1d1..3f63ec6a 100755 --- a/test/test-govet.sh +++ b/test/test-govet.sh @@ -65,6 +65,9 @@ function consistent-imports() { if grep $'\t"github.com/purpleidea/mgmt/engine/util"' "$1"; then # import as engineUtil return 1 fi + if grep '"golang.org/x/net/context"' "$1"; then # use built-in context + return 1 + fi } # run go vet on a per-package basis diff --git a/util/sync_test.go b/util/sync_test.go index 229a00c6..00ce1f2e 100644 --- a/util/sync_test.go +++ b/util/sync_test.go @@ -86,7 +86,7 @@ func TestEasyAckOnce2(t *testing.T) { } } -func ExampleSubscribeSync() { +func ExampleSubscribedSignal() { fmt.Println("hello") x := &SubscribedSignal{} diff --git a/util/util.go b/util/util.go index e39c9da1..3c7facfd 100644 --- a/util/util.go +++ b/util/util.go @@ -430,6 +430,21 @@ func TimeAfterOrBlockCtx(ctx context.Context, t int) <-chan struct{} { return ch } +// CloseAfter takes a duration, similarly to `time.After`, and returns a channel +// that closes when either the context is done, or the duration expires. +func CloseAfter(ctx context.Context, d time.Duration) <-chan struct{} { + ch := make(chan struct{}) + go func() { + defer close(ch) + select { + case <-time.After(d): + // done + case <-ctx.Done(): + } + }() + return ch +} + // SystemBusPrivateUsable makes using the private bus usable. // TODO: should be upstream: https://github.com/godbus/dbus/issues/15 func SystemBusPrivateUsable() (conn *dbus.Conn, err error) { @@ -468,6 +483,26 @@ func SessionBusPrivateUsable() (conn *dbus.Conn, err error) { return conn, nil // success } +// PriorityStrSliceSort filters any elements matching fn to the end of the list. +// You can reverse the match result with a not to filter to the front instead! +// A copy of the list is returned, the original is not modified. +func PriorityStrSliceSort(input []string, fn func(string) bool) []string { + output := []string{} + found := []string{} + for _, x := range input { + if fn(x) { // if we find the key, don't include it just yet + found = append(found, x) // save for later + continue + } + output = append(output, x) + } + + // include the keys at the end (if found) + output = append(output, found...) + + return output +} + // SortedStrSliceCompare takes two lists of strings and returns whether or not // they are equivalent. It will return nil if both sets contain the same // elements, regardless of order, and an error if they do not. diff --git a/util/util_test.go b/util/util_test.go index acbbf87c..b80c5f9a 100644 --- a/util/util_test.go +++ b/util/util_test.go @@ -22,6 +22,7 @@ package util import ( "reflect" "sort" + "strings" "testing" ) @@ -1014,6 +1015,76 @@ func TestRemovePathPrefix0(t *testing.T) { } } +func TestPriorityStrSliceSort0(t *testing.T) { + in := []string{"foo", "bar", "baz"} + ex := []string{"bar", "baz", "foo"} + + fn := func(x string) bool { + return x == "foo" + } + out := PriorityStrSliceSort(in, fn) + + if !reflect.DeepEqual(ex, out) { + t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out) + } +} + +func TestPriorityStrSliceSort1(t *testing.T) { + in := []string{"foo", "bar", "baz"} + ex := []string{"bar", "foo", "baz"} + + fn := func(x string) bool { + return x != "bar" // != brings this key to the front + } + out := PriorityStrSliceSort(in, fn) + + if !reflect.DeepEqual(ex, out) { + t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out) + } +} + +func TestPriorityStrSliceSort2(t *testing.T) { + in := []string{"bar", "foo", "bar", "bar", "baz"} + ex := []string{"foo", "baz", "bar", "bar", "bar"} + + fn := func(x string) bool { + return x == "bar" + } + out := PriorityStrSliceSort(in, fn) + + if !reflect.DeepEqual(ex, out) { + t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out) + } +} + +func TestPriorityStrSliceSort3(t *testing.T) { + in := []string{"foo", "bar1", "bar2", "bar3", "baz"} + ex := []string{"bar1", "bar2", "bar3", "foo", "baz"} + + fn := func(x string) bool { + return !strings.HasPrefix(x, "bar") + } + out := PriorityStrSliceSort(in, fn) + + if !reflect.DeepEqual(ex, out) { + t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out) + } +} + +func TestPriorityStrSliceSort4(t *testing.T) { + in := []string{"foo", "bar1", "bar2", "bar3", "baz"} + ex := []string{"foo", "baz", "bar1", "bar2", "bar3"} + + fn := func(x string) bool { + return strings.HasPrefix(x, "bar") + } + out := PriorityStrSliceSort(in, fn) + + if !reflect.DeepEqual(ex, out) { + t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out) + } +} + func TestSortedStrSliceCompare0(t *testing.T) { slice0 := []string{"foo", "bar", "baz"} slice1 := []string{"bar", "foo", "baz"} diff --git a/vendor/github.com/coreos/etcd b/vendor/github.com/coreos/etcd index 27fc7e22..d57e8b8d 160000 --- a/vendor/github.com/coreos/etcd +++ b/vendor/github.com/coreos/etcd @@ -1 +1 @@ -Subproject commit 27fc7e2296f506182f58ce846e48f36b34fe6842 +Subproject commit d57e8b8d97adfc4a6c224fe116714bf1a1f3beb9 diff --git a/yamlgraph/gapi.go b/yamlgraph/gapi.go index d433f5fe..cc065eff 100644 --- a/yamlgraph/gapi.go +++ b/yamlgraph/gapi.go @@ -18,6 +18,7 @@ package yamlgraph import ( + "context" "fmt" "sync" @@ -166,6 +167,10 @@ func (obj *GAPI) Next() chan gapi.Next { ch <- next return } + // FIXME: add timeout to context + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + startChan := make(chan struct{}) // start signal close(startChan) // kick it off! @@ -173,7 +178,16 @@ func (obj *GAPI) Next() chan gapi.Next { if obj.data.NoStreamWatch { watchChan = nil } else { - watchChan = obj.data.World.ResWatch() + var err error + watchChan, err = obj.data.World.ResWatch(ctx) + if err != nil { + next := gapi.Next{ + Err: errwrap.Wrapf(err, "%s: could not start watch", Name), + Exit: true, // exit, b/c programming error? + } + ch <- next + return + } } for { diff --git a/yamlgraph/gconfig.go b/yamlgraph/gconfig.go index d668fe5e..4f4d380f 100644 --- a/yamlgraph/gconfig.go +++ b/yamlgraph/gconfig.go @@ -19,6 +19,7 @@ package yamlgraph import ( + "context" "fmt" "strings" @@ -168,6 +169,7 @@ func (obj *GraphConfig) Parse(data []byte) error { // NewGraphFromConfig transforms a GraphConfig struct into a new graph. // FIXME: remove any possibly left over, now obsolete graph diff code from here! +// TODO: add a timeout to replace context.TODO() func (obj *GraphConfig) NewGraphFromConfig(hostname string, world engine.World, noop bool) (*pgraph.Graph, error) { // hostname is the uuid for the host @@ -224,7 +226,7 @@ func (obj *GraphConfig) NewGraphFromConfig(hostname string, world engine.World, } // store in backend (usually etcd) - if err := world.ResExport(resourceList); err != nil { + if err := world.ResExport(context.TODO(), resourceList); err != nil { return nil, fmt.Errorf("Config: Could not export resources: %v", err) } @@ -239,7 +241,7 @@ func (obj *GraphConfig) NewGraphFromConfig(hostname string, world engine.World, // database changes, we don't have a partial state of affairs... if len(kindFilter) > 0 { // if kindFilter is empty, don't need to do lookups! var err error - resourceList, err = world.ResCollect(hostnameFilter, kindFilter) + resourceList, err = world.ResCollect(context.TODO(), hostnameFilter, kindFilter) if err != nil { return nil, fmt.Errorf("Config: Could not collect resources: %v", err) }