diff --git a/docs/faq.md b/docs/faq.md
index 3f771ae9..deda3840 100644
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -215,23 +215,25 @@ requires a number of seconds as an argument.
 ./mgmt run lang --lang examples/lang/hello0.mcl --converged-timeout=5
 ```
 
-### What does the error message about an inconsistent dataDir mean?
+### On startup `mgmt` hangs after: `etcd: server: starting...`.
 
 If you get an error message similar to:
 
 ```
-Etcd: Connect: CtxError...
-Etcd: CtxError: Reason: CtxDelayErr(5s): No endpoints available yet!
-Etcd: Connect: Endpoints: []
-Etcd: The dataDir (/var/lib/mgmt/etcd) might be inconsistent or corrupt.
+etcd: server: starting...
+etcd: server: start timeout of 1m0s reached
+etcd: server: close timeout of 15s reached
 ```
 
-This happens when there are a series of fatal connect errors in a row. This can
-happen when you start `mgmt` using a dataDir that doesn't correspond to the
-current cluster view. As a result, the embedded etcd server never finishes
-starting up, and as a result, a default endpoint never gets added. The solution
-is to either reconcile the mistake, and if there is no important data saved, you
-can remove the etcd dataDir. This is typically `/var/lib/mgmt/etcd/member/`.
+But nothing happens afterwards, this can be due to a corrupt etcd storage
+directory. Each etcd server embedded in mgmt must have a special directory where
+it stores local state. It must not be shared by more than one individual member.
+This dir is typically `/var/lib/mgmt/etcd/member/`. If you accidentally use it
+(for example during testing) with a different cluster view, then you can corrupt
+it. This can happen if you use it with more than one different hostname.
+
+The solution is to avoid making this mistake, and if there is no important data
+saved, you can remove the etcd member dir and start over.
 
 ### On running `make` to build a new version, it errors with: `Text file busy`.
 
diff --git a/engine/graph/engine.go b/engine/graph/engine.go
index 11738792..c37a1b31 100644
--- a/engine/graph/engine.go
+++ b/engine/graph/engine.go
@@ -62,6 +62,13 @@ type Engine struct {
 // If the struct does not validate, or it cannot initialize, then this errors.
 // Initially it will contain an empty graph.
 func (obj *Engine) Init() error {
+	if obj.Program == "" {
+		return fmt.Errorf("the Program is empty")
+	}
+	if obj.Hostname == "" {
+		return fmt.Errorf("the Hostname is empty")
+	}
+
 	var err error
 	if obj.graph, err = pgraph.NewGraph("graph"); err != nil {
 		return err
diff --git a/engine/resources/config_etcd.go b/engine/resources/config_etcd.go
new file mode 100644
index 00000000..77c6dfe7
--- /dev/null
+++ b/engine/resources/config_etcd.go
@@ -0,0 +1,250 @@
+// Mgmt
+// Copyright (C) 2013-2019+ James Shubin and the project contributors
+// Written by James Shubin <james@shubin.ca> and the project contributors
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package resources
+
+import (
+	"context"
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/purpleidea/mgmt/engine"
+	"github.com/purpleidea/mgmt/engine/traits"
+	"github.com/purpleidea/mgmt/util"
+	"github.com/purpleidea/mgmt/util/errwrap"
+)
+
+func init() {
+	engine.RegisterResource("config:etcd", func() engine.Res { return &ConfigEtcdRes{} })
+}
+
+const (
+	sizeCheckApplyTimeout = 5 * time.Second
+)
+
+// ConfigEtcdRes is a resource that sets mgmt's etcd configuration.
+type ConfigEtcdRes struct {
+	traits.Base // add the base methods without re-implementation
+
+	init *engine.Init
+
+	// IdealClusterSize is the requested minimum size of the cluster. If you
+	// set this to zero, it will cause a cluster wide shutdown if
+	// AllowSizeShutdown is true. If it's not true, then it will cause a
+	// validation error.
+	IdealClusterSize uint16 `lang:"idealclustersize"`
+	// AllowSizeShutdown is a required safety flag that you must set to true
+	// if you want to allow causing a cluster shutdown by setting
+	// IdealClusterSize to zero.
+	AllowSizeShutdown bool `lang:"allow_size_shutdown"`
+
+	// sizeFlag determines whether sizeCheckApply already ran or not.
+	sizeFlag bool
+
+	interruptChan chan struct{}
+	wg            *sync.WaitGroup
+}
+
+// Default returns some sensible defaults for this resource.
+func (obj *ConfigEtcdRes) Default() engine.Res {
+	return &ConfigEtcdRes{}
+}
+
+// Validate if the params passed in are valid data.
+func (obj *ConfigEtcdRes) Validate() error {
+	if obj.IdealClusterSize < 0 {
+		return fmt.Errorf("the IdealClusterSize param must be positive")
+	}
+
+	if obj.IdealClusterSize == 0 && !obj.AllowSizeShutdown {
+		return fmt.Errorf("the IdealClusterSize can't be zero if AllowSizeShutdown is false")
+	}
+
+	return nil
+}
+
+// Init runs some startup code for this resource.
+func (obj *ConfigEtcdRes) Init(init *engine.Init) error {
+	obj.init = init // save for later
+
+	obj.interruptChan = make(chan struct{})
+	obj.wg = &sync.WaitGroup{}
+
+	return nil
+}
+
+// Close is run by the engine to clean up after the resource is done.
+func (obj *ConfigEtcdRes) Close() error {
+	obj.wg.Wait() // bonus
+	return nil
+}
+
+// Watch is the primary listener for this resource and it outputs events.
+func (obj *ConfigEtcdRes) Watch() error {
+	obj.wg.Add(1)
+	defer obj.wg.Done()
+	// FIXME: add timeout to context
+	// The obj.init.Done channel is closed by the engine to signal shutdown.
+	ctx, cancel := util.ContextWithCloser(context.Background(), obj.init.Done)
+	defer cancel()
+	ch, err := obj.init.World.IdealClusterSizeWatch(util.CtxWithWg(ctx, obj.wg))
+	if err != nil {
+		return errwrap.Wrapf(err, "could not watch ideal cluster size")
+	}
+
+	obj.init.Running() // when started, notify engine that we're running
+
+Loop:
+	for {
+		select {
+		case event, ok := <-ch:
+			if !ok {
+				break Loop
+			}
+			if obj.init.Debug {
+				obj.init.Logf("event: %+v", event)
+			}
+			// pass through and send an event
+
+		case <-obj.init.Done: // closed by the engine to signal shutdown
+		}
+
+		obj.init.Event() // notify engine of an event (this can block)
+	}
+
+	return nil
+}
+
+// sizeCheckApply sets the IdealClusterSize parameter. If it sees a value change
+// to zero, then it *won't* try and change it away from zero, because it assumes
+// that someone has requested a shutdown. If the value is seen on first startup,
+// then it will change it, because it might be a zero from the previous cluster.
+func (obj *ConfigEtcdRes) sizeCheckApply(apply bool) (bool, error) {
+	wg := &sync.WaitGroup{}
+	defer wg.Wait() // this must be above the defer cancel() call
+	ctx, cancel := context.WithTimeout(context.Background(), sizeCheckApplyTimeout)
+	defer cancel()
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		select {
+		case <-obj.interruptChan:
+			cancel()
+		case <-ctx.Done():
+			// let this exit
+		}
+	}()
+
+	val, err := obj.init.World.IdealClusterSizeGet(ctx)
+	if err != nil {
+		return false, errwrap.Wrapf(err, "could not get ideal cluster size")
+	}
+
+	// if we got a value of zero, and we've already run before, then it's ok
+	if obj.IdealClusterSize != 0 && val == 0 && obj.sizeFlag {
+		obj.init.Logf("impending cluster shutdown, not setting ideal cluster size")
+		return true, nil // impending shutdown, don't try and cancel it.
+	}
+	obj.sizeFlag = true
+
+	// must be done after setting the above flag
+	if obj.IdealClusterSize == val { // state is correct
+		return true, nil
+	}
+
+	if !apply {
+		return false, nil
+	}
+
+	// set!
+	// This is run as a transaction so we detect if we needed to change it.
+	changed, err := obj.init.World.IdealClusterSizeSet(ctx, obj.IdealClusterSize)
+	if err != nil {
+		return false, errwrap.Wrapf(err, "could not set ideal cluster size")
+	}
+	if !changed {
+		return true, nil // we lost a race, which means no change needed
+	}
+	obj.init.Logf("set dynamic cluster size to: %d", obj.IdealClusterSize)
+
+	return false, nil
+}
+
+// CheckApply method for Noop resource. Does nothing, returns happy!
+func (obj *ConfigEtcdRes) CheckApply(apply bool) (bool, error) {
+	checkOK := true
+
+	if c, err := obj.sizeCheckApply(apply); err != nil {
+		return false, err
+	} else if !c {
+		checkOK = false
+	}
+
+	// TODO: add more config settings management here...
+	//if c, err := obj.TODOCheckApply(apply); err != nil {
+	//	return false, err
+	//} else if !c {
+	//	checkOK = false
+	//}
+
+	return checkOK, nil // w00t
+}
+
+// Cmp compares two resources and returns an error if they are not equivalent.
+func (obj *ConfigEtcdRes) Cmp(r engine.Res) error {
+	// we can only compare ConfigEtcdRes to others of the same resource kind
+	res, ok := r.(*ConfigEtcdRes)
+	if !ok {
+		return fmt.Errorf("not a %s", obj.Kind())
+	}
+
+	if obj.IdealClusterSize != res.IdealClusterSize {
+		return fmt.Errorf("the IdealClusterSize param differs")
+	}
+	if obj.AllowSizeShutdown != res.AllowSizeShutdown {
+		return fmt.Errorf("the AllowSizeShutdown param differs")
+	}
+
+	return nil
+}
+
+// Interrupt is called to ask the execution of this resource to end early.
+func (obj *ConfigEtcdRes) Interrupt() error {
+	close(obj.interruptChan)
+	return nil
+}
+
+// UnmarshalYAML is the custom unmarshal handler for this struct.
+// It is primarily useful for setting the defaults.
+func (obj *ConfigEtcdRes) UnmarshalYAML(unmarshal func(interface{}) error) error {
+	type rawRes ConfigEtcdRes // indirection to avoid infinite recursion
+
+	def := obj.Default()            // get the default
+	res, ok := def.(*ConfigEtcdRes) // put in the right format
+	if !ok {
+		return fmt.Errorf("could not convert to ConfigEtcdRes")
+	}
+	raw := rawRes(*res) // convert; the defaults go here
+
+	if err := unmarshal(&raw); err != nil {
+		return err
+	}
+
+	*obj = ConfigEtcdRes(raw) // restore from indirection with type conversion!
+	return nil
+}
diff --git a/engine/resources/kv.go b/engine/resources/kv.go
index 698d69ac..9926194f 100644
--- a/engine/resources/kv.go
+++ b/engine/resources/kv.go
@@ -18,11 +18,15 @@
 package resources
 
 import (
+	"context"
 	"fmt"
 	"strconv"
+	"sync"
+	"time"
 
 	"github.com/purpleidea/mgmt/engine"
 	"github.com/purpleidea/mgmt/engine/traits"
+	"github.com/purpleidea/mgmt/util"
 	"github.com/purpleidea/mgmt/util/errwrap"
 )
 
@@ -39,6 +43,10 @@ const (
 	SkipCmpStyleString
 )
 
+const (
+	kvCheckApplyTimeout = 5 * time.Second
+)
+
 // KVRes is a resource which writes a key/value pair into cluster wide storage.
 // It will ensure that the key is set to the requested value. The one exception
 // is that if you use the SkipLessThan parameter, then it will only replace the
@@ -67,6 +75,8 @@ type KVRes struct {
 	// the value is greater when using the SkipLessThan parameter.
 	SkipCmpStyle KVResSkipCmpStyle `lang:"skipcmpstyle" yaml:"skipcmpstyle"`
 
+	interruptChan chan struct{}
+
 	// TODO: does it make sense to have different backends here? (eg: local)
 }
 
@@ -107,6 +117,8 @@ func (obj *KVRes) Validate() error {
 func (obj *KVRes) Init(init *engine.Init) error {
 	obj.init = init // save for later
 
+	obj.interruptChan = make(chan struct{})
+
 	return nil
 }
 
@@ -117,9 +129,17 @@ func (obj *KVRes) Close() error {
 
 // Watch is the primary listener for this resource and it outputs events.
 func (obj *KVRes) Watch() error {
-	obj.init.Running() // when started, notify engine that we're running
+	// FIXME: add timeout to context
+	// The obj.init.Done channel is closed by the engine to signal shutdown.
+	ctx, cancel := util.ContextWithCloser(context.Background(), obj.init.Done)
+	defer cancel()
 
-	ch := obj.init.World.StrMapWatch(obj.getKey()) // get possible events!
+	ch, err := obj.init.World.StrMapWatch(ctx, obj.getKey()) // get possible events!
+	if err != nil {
+		return err
+	}
+
+	obj.init.Running() // when started, notify engine that we're running
 
 	var send = false // send event?
 	for {
@@ -191,13 +211,28 @@ func (obj *KVRes) lessThanCheck(value string) (bool, error) {
 func (obj *KVRes) CheckApply(apply bool) (bool, error) {
 	obj.init.Logf("CheckApply(%t)", apply)
 
+	wg := &sync.WaitGroup{}
+	defer wg.Wait() // this must be above the defer cancel() call
+	ctx, cancel := context.WithTimeout(context.Background(), kvCheckApplyTimeout)
+	defer cancel()
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		select {
+		case <-obj.interruptChan:
+			cancel()
+		case <-ctx.Done():
+			// let this exit
+		}
+	}()
+
 	if val, exists := obj.init.Recv()["Value"]; exists && val.Changed {
 		// if we received on Value, and it changed, wooo, nothing to do.
 		obj.init.Logf("CheckApply: `Value` was updated!")
 	}
 
 	hostname := obj.init.Hostname // me
-	keyMap, err := obj.init.World.StrMapGet(obj.getKey())
+	keyMap, err := obj.init.World.StrMapGet(ctx, obj.getKey())
 	if err != nil {
 		return false, errwrap.Wrapf(err, "check error during StrGet")
 	}
@@ -217,7 +252,7 @@ func (obj *KVRes) CheckApply(apply bool) (bool, error) {
 		return true, nil // nothing to delete, we're good!
 
 	} else if ok && obj.Value == nil { // delete
-		err := obj.init.World.StrMapDel(obj.getKey())
+		err := obj.init.World.StrMapDel(ctx, obj.getKey())
 		return false, errwrap.Wrapf(err, "apply error during StrDel")
 	}
 
@@ -225,7 +260,7 @@ func (obj *KVRes) CheckApply(apply bool) (bool, error) {
 		return false, nil
 	}
 
-	if err := obj.init.World.StrMapSet(obj.getKey(), *obj.Value); err != nil {
+	if err := obj.init.World.StrMapSet(ctx, obj.getKey(), *obj.Value); err != nil {
 		return false, errwrap.Wrapf(err, "apply error during StrSet")
 	}
 
@@ -261,6 +296,12 @@ func (obj *KVRes) Cmp(r engine.Res) error {
 	return nil
 }
 
+// Interrupt is called to ask the execution of this resource to end early.
+func (obj *KVRes) Interrupt() error {
+	close(obj.interruptChan)
+	return nil
+}
+
 // KVUID is the UID struct for KVRes.
 type KVUID struct {
 	engine.BaseUID
diff --git a/engine/world.go b/engine/world.go
index 50fb1431..654c3044 100644
--- a/engine/world.go
+++ b/engine/world.go
@@ -18,6 +18,8 @@
 package engine
 
 import (
+	"context"
+
 	"github.com/purpleidea/mgmt/etcd/scheduler"
 )
 
@@ -25,22 +27,26 @@ import (
 // the GAPI to store state and exchange information throughout the cluster. It
 // is the interface each machine uses to communicate with the rest of the world.
 type World interface { // TODO: is there a better name for this interface?
-	ResWatch() chan error
-	ResExport([]Res) error
+	ResWatch(context.Context) (chan error, error)
+	ResExport(context.Context, []Res) error
 	// FIXME: should this method take a "filter" data struct instead of many args?
-	ResCollect(hostnameFilter, kindFilter []string) ([]Res, error)
+	ResCollect(ctx context.Context, hostnameFilter, kindFilter []string) ([]Res, error)
 
-	StrWatch(namespace string) chan error
+	IdealClusterSizeWatch(context.Context) (chan error, error)
+	IdealClusterSizeGet(context.Context) (uint16, error)
+	IdealClusterSizeSet(context.Context, uint16) (bool, error)
+
+	StrWatch(ctx context.Context, namespace string) (chan error, error)
 	StrIsNotExist(error) bool
-	StrGet(namespace string) (string, error)
-	StrSet(namespace, value string) error
-	StrDel(namespace string) error
+	StrGet(ctx context.Context, namespace string) (string, error)
+	StrSet(ctx context.Context, namespace, value string) error
+	StrDel(ctx context.Context, namespace string) error
 
 	// XXX: add the exchange primitives in here directly?
-	StrMapWatch(namespace string) chan error
-	StrMapGet(namespace string) (map[string]string, error)
-	StrMapSet(namespace, value string) error
-	StrMapDel(namespace string) error
+	StrMapWatch(ctx context.Context, namespace string) (chan error, error)
+	StrMapGet(ctx context.Context, namespace string) (map[string]string, error)
+	StrMapSet(ctx context.Context, namespace, value string) error
+	StrMapDel(ctx context.Context, namespace string) error
 
 	Scheduler(namespace string, opts ...scheduler.Option) (*scheduler.Result, error)
 
diff --git a/etcd/callback.go b/etcd/callback.go
new file mode 100644
index 00000000..2559772f
--- /dev/null
+++ b/etcd/callback.go
@@ -0,0 +1,497 @@
+// Mgmt
+// Copyright (C) 2013-2019+ James Shubin and the project contributors
+// Written by James Shubin <james@shubin.ca> and the project contributors
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package etcd
+
+import (
+	"context"
+	"fmt"
+	"sync"
+
+	"github.com/purpleidea/mgmt/etcd/interfaces"
+	"github.com/purpleidea/mgmt/util"
+	"github.com/purpleidea/mgmt/util/errwrap"
+
+	etcd "github.com/coreos/etcd/clientv3" // "clientv3"
+	pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
+)
+
+// nominateApply applies the changed watcher data onto our local caches.
+func (obj *EmbdEtcd) nominateApply(data *interfaces.WatcherData) error {
+	if data == nil { // ignore empty data
+		return nil
+	}
+
+	// If we tried to lookup the nominated members here (in etcd v3) this
+	// would sometimes block because we would lose the cluster leader once
+	// the current leader calls the MemberAdd API and it steps down trying
+	// to form a two host cluster. Instead, we can look at the event
+	// response data to read the nominated values! Since we only see what
+	// has *changed* in the response data, we have to keep track of the
+	// original state and apply the deltas. This must be idempotent in case
+	// it errors and is called again. If we're retrying and we get a data
+	// format error, it's probably not the end of the world.
+	nominated, err := applyDeltaEvents(data, obj.nominated) // map[hostname]URLs (URLsMap)
+	if err != nil && err != errInconsistentApply {          // allow missing deletes
+		return err // unexpected error, fail
+	}
+	// TODO: do we want to sort this if it becomes a list instead of a map?
+	//sort.Strings(nominated) // deterministic order
+	obj.nominated = nominated
+	return nil
+}
+
+// volunteerApply applies the changed watcher data onto our local caches.
+func (obj *EmbdEtcd) volunteerApply(data *interfaces.WatcherData) error {
+	if data == nil { // ignore empty data
+		return nil
+	}
+	volunteers, err := applyDeltaEvents(data, obj.volunteers) // map[hostname]URLs (URLsMap)
+	if err != nil && err != errInconsistentApply {            // allow missing deletes
+		return err // unexpected error, fail
+	}
+	// TODO: do we want to sort this if it becomes a list instead of a map?
+	//sort.Strings(volunteers) // deterministic order
+	obj.volunteers = volunteers
+	return nil
+}
+
+// endpointApply applies the changed watcher data onto our local caches. In this
+// particular apply function, it also sets our client with the new endpoints.
+func (obj *EmbdEtcd) endpointApply(data *interfaces.WatcherData) error {
+	if data == nil { // ignore empty data
+		return nil
+	}
+	endpoints, err := applyDeltaEvents(data, obj.endpoints) // map[hostname]URLs (URLsMap)
+	if err != nil && err != errInconsistentApply {          // allow missing deletes
+		return err // unexpected error, fail
+	}
+
+	// is the endpoint list different?
+	if err := cmpURLsMap(obj.endpoints, endpoints); err != nil {
+		obj.endpoints = endpoints // set
+		// can happen if a server drops out for example
+		obj.Logf("endpoint list changed to: %+v", endpoints)
+		obj.setEndpoints()
+	}
+	return nil
+}
+
+// nominateCb runs to respond to the nomination list change events.
+// Functionally, it controls the starting and stopping of the server process. If
+// a nominate message is received for this machine, then it means it is already
+// being added to the cluster with member add and the cluster is now waiting for
+// it to start up. When a nominate entry is removed, it's up to this function to
+// run the member remove right before it shuts its server down.
+func (obj *EmbdEtcd) nominateCb(ctx context.Context) error {
+	// Ensure that only one copy of this function is run simultaneously.
+	// This is because we don't want to cause runServer to race with
+	// destroyServer. Let us completely start up before we can cancel it. As
+	// a special case, destroyServer itself can race against itself. I don't
+	// think it's possible for contention on this mutex, but we'll leave it
+	// in for safety.
+	obj.nominatedMutex.Lock()
+	defer obj.nominatedMutex.Unlock()
+	// This ordering mutex is being added for safety, since there is no good
+	// reason for this function and volunteerCb to run simultaneously, and
+	// it might be preventing a race condition that was happening.
+	obj.orderingMutex.Lock()
+	defer obj.orderingMutex.Unlock()
+	if obj.Debug {
+		obj.Logf("nominateCb")
+		defer obj.Logf("nominateCb: done!")
+	}
+
+	// check if i have actually volunteered first of all...
+	if obj.NoServer || len(obj.ServerURLs) == 0 {
+		obj.Logf("inappropriately nominated, rogue or stale server?")
+		// TODO: should we un-nominate ourself?
+		return nil // we've done our job successfully
+	}
+
+	// This can happen when we're shutting down, build the nominated value.
+	if len(obj.nominated) == 0 {
+		obj.Logf("list of nominations is empty")
+		//return nil // don't exit, we might want to shutdown the server
+	} else {
+		obj.Logf("nominated: %v", obj.nominated)
+	}
+
+	// if there are no other peers, we create a new server
+	// TODO: do we need an || len(obj.nominated) == 0 if we're the first?
+	_, exists := obj.nominated[obj.Hostname] // am i nominated?
+	newCluster := len(obj.nominated) == 1 && exists
+	if obj.Debug {
+		obj.Logf("nominateCb: newCluster: %t; exists: %t; obj.server == nil: %t", newCluster, exists, obj.server == nil)
+	}
+
+	// TODO: server start retries should be handled inside of runServer...
+	if obj.serverAction(serverActionStart) { // start
+		// no server is running, but it should be
+		wg := &sync.WaitGroup{}
+		serverReady, ackReady := obj.ServerReady()    // must call ack!
+		serverExited, ackExited := obj.ServerExited() // must call ack!
+
+		var sendError = false
+		var serverErr error
+		obj.Logf("waiting for server...")
+		nominated, err := copyURLsMap(obj.nominated)
+		if err != nil {
+			return err
+		}
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			obj.errExitN = make(chan struct{})
+			defer close(obj.errExitN) // multi-signal for errChan close op
+			// blocks until server exits
+			serverErr = obj.runServer(newCluster, nominated)
+			// in case this exits on its own instead of with destroy
+			defer obj.destroyServer()          // run to reset some values
+			if sendError && serverErr != nil { // exited with an error
+				select {
+				case obj.errChan <- errwrap.Wrapf(serverErr, "runServer errored"):
+				}
+			}
+		}()
+
+		// block until either server is ready or an early exit occurs
+		select {
+		case <-serverReady:
+			// detach from our local return of errors from an early
+			// server exit (pre server ready) and switch to channel
+			sendError = true // gets set before the ackReady() does
+			ackReady()       // must be called
+			ackExited()      // must be called
+			// pass
+
+		case <-serverExited:
+			ackExited() // must be called
+			ackReady()  // must be called
+
+			wg.Wait() // wait for server to finish to get early err
+			return serverErr
+		}
+
+		// Once the server is online, we *must* publish this information
+		// so that (1) others know where to connect to us (2) we provide
+		// an "event" for member add since there is not any event that's
+		// currently built-in to etcd and (3) so we have a key to expire
+		// when we shutdown or crash to give us the member remove event.
+		// please see issue: https://github.com/coreos/etcd/issues/5277
+
+	} else if obj.serverAction(serverActionStop) { // stop?
+		// server is running, but it should not be
+
+		// i have been un-nominated, remove self and shutdown server!
+		// we don't need to do a member remove if i'm the last one...
+		if len(obj.nominated) != 0 { // don't call if nobody left but me!
+			// work around: https://github.com/coreos/etcd/issues/5482
+			// and it might make sense to avoid it if we're the last
+			obj.Logf("member remove: removing self: %d", obj.serverID)
+			resp, err := obj.memberRemove(ctx, obj.serverID)
+			if err != nil {
+				if obj.Debug {
+					obj.Logf("error with member remove: %v", err)
+				}
+				return errwrap.Wrapf(err, "member remove error")
+			}
+			if resp != nil {
+				obj.Logf("member removed (self): %s (%d)", obj.Hostname, obj.serverID)
+				if err := obj.updateMemberState(resp.Members); err != nil {
+					return err
+				}
+			}
+		}
+
+		// FIXME: if we fail on destroy should we try to run some of the
+		// other cleanup tasks that usually afterwards (below) anyways ?
+		if err := obj.destroyServer(); err != nil { // sync until exited
+			return errwrap.Wrapf(err, "destroyServer errored")
+		}
+
+		// We close with this special sentinel only during destroy/exit.
+		if obj.closing {
+			return interfaces.ErrShutdown
+		}
+	}
+
+	return nil
+}
+
+// volunteerCb runs to respond to the volunteer list change events.
+// Functionally, it controls the nominating and adding of members. It typically
+// nominates a peer so that it knows it will get to be a server, which causes it
+// to start up its server. It also runs the member add operation so that the
+// cluster gets quorum safely. The member remove operation is typically run in
+// the nominateCb of that server when it is asked to shutdown. This occurs when
+// the nominate entry for that server is removed. If a server removes its
+// volunteer entry we must respond by removing the nomination so that it can
+// receive that message and shutdown.
+// FIXME: we might need to respond to member change/disconnect/shutdown events,
+// see: https://github.com/coreos/etcd/issues/5277
+// XXX: Don't allow this function to partially run if it is canceled part way
+// through... We don't want an inconsistent state where we did unnominate, but
+// didn't remove a member...
+// XXX: If the leader changes, do we need to kick the volunteerCb or anything
+// else that might have required a leader and which returned because it did not
+// have one, thus loosing an event?
+func (obj *EmbdEtcd) volunteerCb(ctx context.Context) error {
+	// Ensure that only one copy of this function is run simultaneously.
+	// It's not entirely clear if this can ever happen or if it's needed,
+	// but it's an inexpensive safety check that we can add in for now.
+	obj.volunteerMutex.Lock()
+	defer obj.volunteerMutex.Unlock()
+	// This ordering mutex is being added for safety, since there is no good
+	// reason for this function and nominateCb to run simultaneously, and it
+	// might be preventing a race condition that was happening.
+	obj.orderingMutex.Lock()
+	defer obj.orderingMutex.Unlock()
+	if obj.Debug {
+		obj.Logf("volunteerCb")
+		defer obj.Logf("volunteerCb: done!")
+	}
+
+	// FIXME: are there any situations where we don't want to short circuit
+	// here, such as if i'm the last node?
+	if obj.server == nil {
+		if obj.Debug {
+			obj.Logf("i'm not a server yet...")
+		}
+		return nil // if i'm not a server, i'm not a leader, return
+	}
+
+	// FIXME: Instead of checking this, assume yes, and use the
+	// `WithRequireLeader` wrapper, and just ignore the error from that if
+	// it's wrong... Combined with events that poke this volunteerCb when
+	// the leader changes, we shouldn't miss any events...
+	if isLeader, err := obj.isLeader(ctx); err != nil { // XXX: race!
+		return errwrap.Wrapf(err, "error determining leader")
+	} else if !isLeader {
+		if obj.Debug {
+			obj.Logf("we are not the leader...")
+		}
+		return nil
+	}
+	// i am the leader!
+
+	// Remember that the member* operations return the membership, so this
+	// means we don't need to run an extra memberList in those scenarios...
+	// However, this can get out of sync easily, so ensure that our member
+	// information is very recent.
+	if err := obj.memberStateFromList(ctx); err != nil {
+		return errwrap.Wrapf(err, "error during state sync")
+	}
+	// XXX: If we have any unstarted members here, do we want to reschedule
+	// this volunteerCb in a moment? Or will we get another event anyways?
+
+	// NOTE: There used to be an is_leader check right here...
+	// FIXME: Should we use WithRequireLeader instead? Here? Elsewhere?
+	// https://godoc.org/github.com/coreos/etcd/clientv3#WithRequireLeader
+
+	// FIXME: can this happen, and if so, is it an error or a pass-through?
+	if len(obj.volunteers) == 0 {
+		obj.Logf("list of volunteers is empty")
+		//return fmt.Errorf("volunteer list is empty")
+	} else {
+		obj.Logf("volunteers: %+v", obj.volunteers)
+	}
+
+	// TODO: do we really need to check these errors?
+	m, err := copyURLsMap(obj.membermap) // list of members...
+	if err != nil {
+		return err
+	}
+	v, err := copyURLsMap(obj.volunteers)
+	if err != nil {
+		return err
+	}
+	// Unnominate anyone that unvolunteers, so they can shutdown cleanly...
+	// FIXME: one step at a time... do we trigger subsequent steps somehow?
+	obj.Logf("chooser: (%+v)/(%+v)", m, v)
+	nominate, unnominate, err := obj.Chooser.Choose(m, v)
+	if err != nil {
+		return errwrap.Wrapf(err, "chooser error")
+	}
+
+	// Ensure that we are the *last* in the list if we're unnominating, and
+	// the *first* in the list if we're nominating. This way, we self-remove
+	// last, and we self-add first. This is least likely to hurt quorum.
+	headFn := func(x string) bool {
+		return x != obj.Hostname
+	}
+	tailFn := func(x string) bool {
+		return x == obj.Hostname
+	}
+	nominate = util.PriorityStrSliceSort(nominate, headFn)
+	unnominate = util.PriorityStrSliceSort(unnominate, tailFn)
+	obj.Logf("chooser result(+/-): %+v/%+v", nominate, unnominate)
+	var reterr error
+	leaderCtx := ctx // default ctx to use
+	if RequireLeaderCtx {
+		leaderCtx = etcd.WithRequireLeader(ctx) // FIXME: Is this correct?
+	}
+
+	for i := range nominate {
+		member := nominate[i]
+		peerURLs, exists := obj.volunteers[member] // comma separated list of urls
+		if !exists {
+			// if this happens, do we have an update race?
+			return fmt.Errorf("could not find member `%s` in volunteers map", member)
+		}
+
+		// NOTE: storing peerURLs when they're already in volunteers/ is
+		// redundant, but it seems to be necessary for a sane algorithm.
+		// nominate before we call the API so that members see it first!
+		if err := obj.nominate(leaderCtx, member, peerURLs); err != nil {
+			return errwrap.Wrapf(err, "error nominating: %s", member)
+		}
+		// XXX: can we add a ttl here, because once we nominate someone,
+		// we need to give them up to N seconds to start up after we run
+		// the MemberAdd API because if they don't, in some situations
+		// such as if we're adding the second node to the cluster, then
+		// we've lost quorum until a second member joins! If the TTL
+		// expires, we need to MemberRemove! In this special case, we
+		// need to forcefully remove the second member if we don't add
+		// them, because we'll be in a lack of quorum state and unable
+		// to do anything... As a result, we should always only add ONE
+		// member at a time!
+
+		// XXX: After we memberAdd, can we wait a timeout, and then undo
+		// the add if the member doesn't come up? We'd also need to run
+		// an unnominate too, and mark the node as temporarily failed...
+		obj.Logf("member add: %s: %v", member, peerURLs)
+		resp, err := obj.memberAdd(leaderCtx, peerURLs)
+		if err != nil {
+			// FIXME: On on error this function needs to run again,
+			// because we need to make sure to add the member here!
+			return errwrap.Wrapf(err, "member add error")
+		}
+		if resp != nil { // if we're already the right state, we get nil
+			obj.Logf("member added: %s (%d): %v", member, resp.Member.ID, peerURLs)
+			if err := obj.updateMemberState(resp.Members); err != nil {
+				return err
+			}
+			if resp.Member.Name == "" { // not started instantly ;)
+				obj.addMemberState(member, resp.Member.ID, peerURLs, nil)
+			}
+			// TODO: would this ever happen or be necessary?
+			//if member == obj.Hostname {
+			//	obj.addSelfState()
+			//}
+		}
+	}
+
+	// we must remove them from the members API or it will look like a crash
+	if l := len(unnominate); l > 0 {
+		obj.Logf("unnominated: shutting down %d members...", l)
+	}
+	for i := range unnominate {
+		member := unnominate[i]
+		memberID, exists := obj.memberIDs[member] // map[string]uint64
+		if !exists {
+			// if this happens, do we have an update race?
+			return fmt.Errorf("could not find member `%s` in memberIDs map", member)
+		}
+
+		// start a watcher to know if member was added
+		cancelCtx, cancel := context.WithCancel(leaderCtx)
+		defer cancel()
+		timeout := util.CloseAfter(cancelCtx, SelfRemoveTimeout) // chan closes
+		fn := func(members []*pb.Member) error {
+			for _, m := range members {
+				if m.Name == member || m.ID == memberID {
+					return fmt.Errorf("still present")
+				}
+			}
+
+			return nil // not found!
+		}
+		ch, err := obj.memberChange(cancelCtx, fn, MemberChangeInterval)
+		if err != nil {
+			return errwrap.Wrapf(err, "error watching for change of: %s", member)
+		}
+		if err := obj.nominate(leaderCtx, member, nil); err != nil { // unnominate
+			return errwrap.Wrapf(err, "error unnominating: %s", member)
+		}
+		// Once we issue the above unnominate, that peer will
+		// shutdown, and this might cause us to loose quorum,
+		// therefore, let that member remove itself, and then
+		// double check that it did happen in case delinquent.
+		// TODO: get built-in transactional member Add/Remove
+		// functionality to avoid a separate nominate list...
+
+		// If we're removing ourself, then let the (un)nominate callback
+		// do it. That way it removes itself cleanly on server shutdown.
+		if member == obj.Hostname { // remove in unnominate!
+			cancel()
+			obj.Logf("unnominate: removing self...")
+			continue
+		}
+
+		// cancel remove sleep and unblock early on event...
+		obj.Logf("waiting %s for %s to self remove...", SelfRemoveTimeout.String(), member)
+		select {
+		case <-timeout:
+			// pass
+		case err, ok := <-ch:
+			if ok {
+				select {
+				case <-timeout:
+					// wait until timeout finishes
+				}
+				reterr = errwrap.Append(reterr, err)
+			}
+			// removed quickly!
+		}
+		cancel()
+
+		// In case the removed member doesn't remove itself, do it!
+		resp, err := obj.memberRemove(leaderCtx, memberID)
+		if err != nil {
+			return errwrap.Wrapf(err, "member remove error")
+		}
+		if resp != nil {
+			obj.Logf("member removed (forced): %s (%d)", member, memberID)
+			if err := obj.updateMemberState(resp.Members); err != nil {
+				return err
+			}
+			// Do this I guess, but the TTL will eventually get it.
+			// Remove the other member to avoid client connections.
+			if err := obj.advertise(leaderCtx, member, nil); err != nil {
+				return err
+			}
+		}
+
+		// Remove the member from our lists to avoid blocking future
+		// possible MemberList calls which would try and connect to a
+		// missing member... The lists should get updated from the
+		// member exiting safely if it doesn't crash, but if it did
+		// and/or since it's a race to see if the update event will get
+		// seen before we need the new data, just do it now anyways.
+		// TODO: Is the above comment still true?
+		obj.rmMemberState(member) // proactively delete it
+
+		obj.Logf("member %s (%d) removed successfully!", member, memberID)
+	}
+
+	// NOTE: We could ensure that etcd reconnects here, but we can just wait
+	// for the endpoints callback which should see the state change instead.
+
+	obj.setEndpoints() // sync client with new endpoints
+	return reterr
+}
diff --git a/etcd/chooser/chooser.go b/etcd/chooser/chooser.go
new file mode 100644
index 00000000..09a22962
--- /dev/null
+++ b/etcd/chooser/chooser.go
@@ -0,0 +1,98 @@
+// Mgmt
+// Copyright (C) 2013-2019+ James Shubin and the project contributors
+// Written by James Shubin <james@shubin.ca> and the project contributors
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package chooser
+
+import (
+	"context"
+
+	"github.com/purpleidea/mgmt/etcd/interfaces"
+
+	etcdtypes "github.com/coreos/etcd/pkg/types"
+)
+
+// Data represents the input data that is passed to the chooser.
+type Data struct {
+	// Hostname is the hostname running this chooser instance. It can be
+	// used as a unique key in the cluster.
+	Hostname string // ourself
+
+	Debug bool
+	Logf  func(format string, v ...interface{})
+}
+
+// Chooser represents the interface you must implement if you want to be able to
+// control which cluster members are added and removed. Remember that this can
+// get run from any peer (server) machine in the cluster, and that this may
+// change as different leaders are elected! Do not assume any state will remain
+// between invocations. If you want to maintain hysteresis or state, make sure
+// to synchronize it in etcd.
+type Chooser interface {
+	// Validate validates the chooser implementation to ensure the params
+	// represent a valid instantiation.
+	Validate() error
+
+	// Init initializes the chooser and passes in some useful data and
+	// handles.
+	Init(*Data) error
+
+	// Connect will be called with a client interfaces.Client that you can
+	// use if necessary to store some shared state between instances of this
+	// and watch for external changes. Sharing state between members should
+	// be avoided if possible, and there is no guarantee that your data
+	// won't be deleted in a disaster. There are no backups for this,
+	// regenerate anything you might need. Additionally, this may only be
+	// used inside the Chooser method, since Connect is only called after
+	// Init. This is however very useful for implementing special choosers.
+	// Since some operations can run on connect, it gets a context. If you
+	// cancel this context, then you might expect that Watch could die too.
+	// Both of these should get cancelled if you call Disconnect.
+	Connect(context.Context, interfaces.Client) error // we get given a namespaced client
+
+	// Disconnect tells us to cancel our use of the client interface that we
+	// got from the Connect method. We must not return until we're done.
+	Disconnect() error
+
+	// Watch is called by the engine to allow us to Watch for changes that
+	// might cause us to want to re-evaluate our nomination decision. It
+	// should error if it cannot startup. Once it is running, it should send
+	// a nil error on every event, and an error if things go wrong. When
+	// Disconnect is shutdown, then that should cause this to exit. When
+	// this sends events, Choose will usually eventually get called in
+	// response.
+	Watch() (chan error, error)
+
+	// Choose takes the current peer membership state, and the available
+	// volunteers, and produces a list of who we should add and who should
+	// quit. In general, it's best to only remove one member at a time, in
+	// particular because this will get called iteratively on future events,
+	// and it can remove subsequent members on the next iteration. One
+	// important note: when building a new cluster, we do assume that out of
+	// one available volunteer, and no members, that this first volunteer is
+	// selected. Make sure that any implementations of this function do this
+	// as well, since otherwise the hardcoded initial assumption would be
+	// proven wrong here!
+	// TODO: we could pass in two lists of hostnames instead of the full
+	// URLsMap here, but let's keep it more complicated now in case, and
+	// reduce it down later if needed...
+	// TODO: should we add a step arg here ?
+	Choose(membership, volunteers etcdtypes.URLsMap) (nominees, quitters []string, err error)
+
+	// Close runs some cleanup routines in case there is anything that you'd
+	// like to free after we're done.
+	Close() error
+}
diff --git a/etcd/chooser/dynamicsize.go b/etcd/chooser/dynamicsize.go
new file mode 100644
index 00000000..3f60f520
--- /dev/null
+++ b/etcd/chooser/dynamicsize.go
@@ -0,0 +1,285 @@
+// Mgmt
+// Copyright (C) 2013-2019+ James Shubin and the project contributors
+// Written by James Shubin <james@shubin.ca> and the project contributors
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package chooser
+
+import (
+	"context"
+	"fmt"
+	"strconv"
+	"sync"
+
+	"github.com/purpleidea/mgmt/etcd/interfaces"
+
+	etcd "github.com/coreos/etcd/clientv3"
+	etcdtypes "github.com/coreos/etcd/pkg/types"
+)
+
+// XXX: Test causing cluster shutdowns with:
+// ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/chooser/dynamicsize/idealclustersize 0
+// It is currently broken.
+
+const (
+	// DefaultIdealDynamicSize is the default target ideal dynamic cluster
+	// size used for the initial cluster.
+	DefaultIdealDynamicSize = 5
+
+	// IdealDynamicSizePath is the path key used for the chooser. It usually
+	// gets used with a namespace prefix.
+	IdealDynamicSizePath = "/dynamicsize/idealclustersize"
+)
+
+// DynamicSize is a simple implementation of the Chooser interface. This helps
+// select which machines to add and remove as we elastically grow and shrink our
+// cluster.
+// TODO: think of a better name
+type DynamicSize struct {
+	// IdealClusterSize is the ideal target size for this cluster. If it is
+	// set to zero, then it will use DefaultIdealDynamicSize as the value.
+	IdealClusterSize uint16
+
+	data   *Data // save for later
+	client interfaces.Client
+
+	ctx    context.Context
+	cancel func()
+	wg     *sync.WaitGroup
+}
+
+// Validate validates the struct.
+func (obj *DynamicSize) Validate() error {
+	// TODO: if changed to zero, treat as a cluster shutdown signal
+	if obj.IdealClusterSize < 0 {
+		return fmt.Errorf("must choose a positive IdealClusterSize value")
+	}
+	return nil
+}
+
+// Init accepts some useful data and handles.
+func (obj *DynamicSize) Init(data *Data) error {
+	if data.Hostname == "" {
+		return fmt.Errorf("can't Init with empty Hostname value")
+	}
+	if data.Logf == nil {
+		return fmt.Errorf("no Logf function was specified")
+	}
+
+	if obj.IdealClusterSize == 0 {
+		obj.IdealClusterSize = DefaultIdealDynamicSize
+	}
+
+	obj.data = data
+	obj.wg = &sync.WaitGroup{}
+	return nil
+}
+
+// Close runs some cleanup routines.
+func (obj *DynamicSize) Close() error {
+	return nil
+}
+
+// Connect is called to accept an etcd.KV namespace that we can use.
+func (obj *DynamicSize) Connect(ctx context.Context, client interfaces.Client) error {
+	obj.client = client
+	obj.ctx, obj.cancel = context.WithCancel(ctx)
+	size, err := DynamicSizeGet(obj.ctx, obj.client)
+	if err == interfaces.ErrNotExist || (err == nil && size <= 0) {
+		// unset, set in running cluster
+		changed, err := DynamicSizeSet(obj.ctx, obj.client, obj.IdealClusterSize)
+		if err == nil && changed {
+			obj.data.Logf("set dynamic cluster size to: %d", obj.IdealClusterSize)
+		}
+		return err
+	} else if err == nil && size >= 1 {
+		// unset, get from running cluster (use the valid cluster value)
+		if obj.IdealClusterSize != size {
+			obj.data.Logf("using dynamic cluster size of: %d", size)
+		}
+		obj.IdealClusterSize = size // get from exiting cluster...
+	}
+
+	return err
+}
+
+// Disconnect is called to cancel our use of the etcd.KV connection.
+func (obj *DynamicSize) Disconnect() error {
+	if obj.client != nil { // if connect was not called, don't call this...
+		obj.cancel()
+	}
+	obj.wg.Wait()
+	return nil
+}
+
+// Watch is called to send events anytime we might want to change membership. It
+// is also used to watch for changes so that when we get an event, we know to
+// honour the change in Choose.
+func (obj *DynamicSize) Watch() (chan error, error) {
+	// NOTE: The body of this function is very similar to the logic in the
+	// simple client.Watcher implementation that wraps ComplexWatcher.
+	path := IdealDynamicSizePath
+	cancelCtx, cancel := context.WithCancel(obj.ctx)
+	info, err := obj.client.ComplexWatcher(cancelCtx, path)
+	if err != nil {
+		defer cancel()
+		return nil, err
+	}
+	ch := make(chan error)
+	obj.wg.Add(1) // hook in to global wait group
+	go func() {
+		defer obj.wg.Done()
+		defer close(ch)
+		defer cancel()
+		var data *interfaces.WatcherData
+		var ok bool
+		for {
+			select {
+			case data, ok = <-info.Events: // read
+				if !ok {
+					return
+				}
+			case <-cancelCtx.Done():
+				continue // wait for ch closure, but don't block
+			}
+
+			size := obj.IdealClusterSize
+			for _, event := range data.Events { // apply each event
+				if event.Type != etcd.EventTypePut {
+					continue
+				}
+				key := string(event.Kv.Key)
+				key = key[len(data.Path):] // remove path prefix
+				val := string(event.Kv.Value)
+				if val == "" {
+					continue // ignore empty values
+				}
+				i, err := strconv.Atoi(val)
+				if err != nil {
+					continue // ignore bad values
+				}
+				size = uint16(i) // save
+			}
+			if size == obj.IdealClusterSize {
+				continue // no change
+			}
+			// set before sending the signal
+			obj.IdealClusterSize = size
+
+			if size == 0 { // zero means shutdown
+				obj.data.Logf("impending cluster shutdown...")
+			} else {
+				obj.data.Logf("got new dynamic cluster size of: %d", size)
+			}
+
+			select {
+			case ch <- data.Err: // send (might be nil!)
+			case <-cancelCtx.Done():
+				continue // wait for ch closure, but don't block
+			}
+		}
+	}()
+	return ch, nil
+}
+
+// Choose accepts a list of current membership, and a list of volunteers. From
+// that we can decide who we should add and remove. We return a list of those
+// nominated, and unnominated users respectively.
+func (obj *DynamicSize) Choose(membership, volunteers etcdtypes.URLsMap) ([]string, []string, error) {
+	// Possible nominees include anyone that has volunteered, but that
+	// isn't a member.
+	if obj.data.Debug {
+		obj.data.Logf("goal: %d members", obj.IdealClusterSize)
+	}
+	nominees := []string{}
+	for hostname := range volunteers {
+		if _, exists := membership[hostname]; !exists {
+			nominees = append(nominees, hostname)
+		}
+	}
+
+	// Possible quitters include anyone that is a member, but that is not a
+	// volunteer. (They must have unvolunteered.)
+	quitters := []string{}
+	for hostname := range membership {
+		if _, exists := volunteers[hostname]; !exists {
+			quitters = append(quitters, hostname)
+		}
+	}
+
+	// What we want to know...
+	nominated := []string{}
+	unnominated := []string{}
+
+	// We should always only add ONE member at a time!
+	// TODO: is it okay to remove multiple members at the same time?
+	if len(nominees) > 0 && len(membership)-len(quitters) < int(obj.IdealClusterSize) {
+		//unnominated = []string{} // only do one operation at a time
+		nominated = []string{nominees[0]} // FIXME: use a better picker algorithm
+
+	} else if len(quitters) == 0 && len(membership) > int(obj.IdealClusterSize) { // too many members
+		//nominated = []string{} // only do one operation at a time
+		for kicked := range membership {
+			// don't kick ourself unless we are the only one left...
+			if kicked != obj.data.Hostname || (obj.IdealClusterSize == 0 && len(membership) == 1) {
+				unnominated = []string{kicked} // FIXME: use a better picker algorithm
+				break
+			}
+		}
+	} else if len(quitters) > 0 { // must do these before new unvolunteers
+		unnominated = quitters // get rid of the quitters
+	}
+
+	return nominated, unnominated, nil // perform these changes
+}
+
+// DynamicSizeGet gets the currently set dynamic size set in the cluster.
+func DynamicSizeGet(ctx context.Context, client interfaces.Client) (uint16, error) {
+	key := IdealDynamicSizePath
+	m, err := client.Get(ctx, key) // (map[string]string, error)
+	if err != nil {
+		return 0, err
+	}
+	val, exists := m[IdealDynamicSizePath]
+	if !exists {
+		return 0, interfaces.ErrNotExist
+	}
+	i, err := strconv.Atoi(val)
+	if err != nil {
+		return 0, fmt.Errorf("bad value")
+	}
+	return uint16(i), nil
+}
+
+// DynamicSizeSet sets the dynamic size in the cluster. It returns true if it
+// changed or set the value.
+func DynamicSizeSet(ctx context.Context, client interfaces.Client, size uint16) (bool, error) {
+	key := IdealDynamicSizePath
+	val := strconv.FormatUint(uint64(size), 10) // fmt.Sprintf("%d", size)
+
+	ifCmps := []etcd.Cmp{
+		etcd.Compare(etcd.Value(key), "=", val), // desired state
+	}
+	elseOps := []etcd.Op{etcd.OpPut(key, val)}
+
+	resp, err := client.Txn(ctx, ifCmps, nil, elseOps)
+	if err != nil {
+		return false, err
+	}
+	// succeeded is set to true if the compare evaluated to true
+	changed := !resp.Succeeded
+
+	return changed, err
+}
diff --git a/etcd/client.go b/etcd/client.go
deleted file mode 100644
index e1d34839..00000000
--- a/etcd/client.go
+++ /dev/null
@@ -1,95 +0,0 @@
-// Mgmt
-// Copyright (C) 2013-2019+ James Shubin and the project contributors
-// Written by James Shubin <james@shubin.ca> and the project contributors
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-package etcd
-
-import (
-	"time"
-
-	"github.com/purpleidea/mgmt/util/errwrap"
-
-	etcd "github.com/coreos/etcd/clientv3" // "clientv3"
-	context "golang.org/x/net/context"
-)
-
-// ClientEtcd provides a simple etcd client for deploy and status operations.
-type ClientEtcd struct {
-	Seeds []string // list of endpoints to try to connect
-
-	client *etcd.Client
-}
-
-// GetClient returns a handle to the raw etcd client object.
-func (obj *ClientEtcd) GetClient() *etcd.Client {
-	return obj.client
-}
-
-// GetConfig returns the config struct to be used for the etcd client connect.
-func (obj *ClientEtcd) GetConfig() etcd.Config {
-	cfg := etcd.Config{
-		Endpoints: obj.Seeds,
-		// RetryDialer chooses the next endpoint to use
-		// it comes with a default dialer if unspecified
-		DialTimeout: 5 * time.Second,
-	}
-	return cfg
-}
-
-// Connect connects the client to a server, and then builds the *API structs.
-// If reconnect is true, it will force a reconnect with new config endpoints.
-func (obj *ClientEtcd) Connect() error {
-	if obj.client != nil { // memoize
-		return nil
-	}
-
-	var err error
-	cfg := obj.GetConfig()
-	obj.client, err = etcd.New(cfg) // connect!
-	if err != nil {
-		return errwrap.Wrapf(err, "client connect error")
-	}
-	return nil
-}
-
-// Destroy cleans up the entire etcd client connection.
-func (obj *ClientEtcd) Destroy() error {
-	err := obj.client.Close()
-	//obj.wg.Wait()
-	return err
-}
-
-// Get runs a get on the client connection. This has the same signature as our
-// EmbdEtcd Get function.
-func (obj *ClientEtcd) Get(path string, opts ...etcd.OpOption) (map[string]string, error) {
-	resp, err := obj.client.Get(context.TODO(), path, opts...)
-	if err != nil || resp == nil {
-		return nil, err
-	}
-
-	// TODO: write a resp.ToMap() function on https://godoc.org/github.com/coreos/etcd/etcdserver/etcdserverpb#RangeResponse
-	result := make(map[string]string)
-	for _, x := range resp.Kvs {
-		result[string(x.Key)] = string(x.Value)
-	}
-	return result, nil
-}
-
-// Txn runs a transaction on the client connection. This has the same signature
-// as our EmbdEtcd Txn function.
-func (obj *ClientEtcd) Txn(ifcmps []etcd.Cmp, thenops, elseops []etcd.Op) (*etcd.TxnResponse, error) {
-	return obj.client.KV.Txn(context.TODO()).If(ifcmps...).Then(thenops...).Else(elseops...).Commit()
-}
diff --git a/etcd/resources.go b/etcd/client/resources/resources.go
similarity index 67%
rename from etcd/resources.go
rename to etcd/client/resources/resources.go
index c5c52883..b310dca8 100644
--- a/etcd/resources.go
+++ b/etcd/client/resources/resources.go
@@ -15,60 +15,43 @@
 // You should have received a copy of the GNU General Public License
 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-package etcd
+package resources
 
 import (
+	"context"
 	"fmt"
-	"log"
 	"strings"
 
 	"github.com/purpleidea/mgmt/engine"
 	engineUtil "github.com/purpleidea/mgmt/engine/util"
+	"github.com/purpleidea/mgmt/etcd/interfaces"
 	"github.com/purpleidea/mgmt/util"
 
 	etcd "github.com/coreos/etcd/clientv3"
 )
 
+const (
+	ns = "" // in case we want to add one back in
+)
+
 // WatchResources returns a channel that outputs events when exported resources
 // change.
 // TODO: Filter our watch (on the server side if possible) based on the
 // collection prefixes and filters that we care about...
-func WatchResources(obj *EmbdEtcd) chan error {
-	ch := make(chan error, 1) // buffer it so we can measure it
-	path := fmt.Sprintf("%s/exported/", NS)
-	callback := func(re *RE) error {
-		// TODO: is this even needed? it used to happen on conn errors
-		log.Printf("Etcd: Watch: Path: %v", path) // event
-		if re == nil || re.response.Canceled {
-			return fmt.Errorf("watch is empty") // will cause a CtxError+retry
-		}
-		// we normally need to check if anything changed since the last
-		// event, since a set (export) with no changes still causes the
-		// watcher to trigger and this would cause an infinite loop. we
-		// don't need to do this check anymore because we do the export
-		// transactionally, and only if a change is needed. since it is
-		// atomic, all the changes arrive together which avoids dupes!!
-		if len(ch) == 0 { // send event only if one isn't pending
-			// this check avoids multiple events all queueing up and then
-			// being released continuously long after the changes stopped
-			// do not block!
-			ch <- nil // event
-		}
-		return nil
-	}
-	_, _ = obj.AddWatcher(path, callback, true, false, etcd.WithPrefix()) // no need to check errors
-	return ch
+func WatchResources(ctx context.Context, client interfaces.Client) (chan error, error) {
+	path := fmt.Sprintf("%s/exported/", ns)
+	return client.Watcher(ctx, path, etcd.WithPrefix())
 }
 
 // SetResources exports all of the resources which we pass in to etcd.
-func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) error {
+func SetResources(ctx context.Context, client interfaces.Client, hostname string, resourceList []engine.Res) error {
 	// key structure is $NS/exported/$hostname/resources/$uid = $data
 
 	var kindFilter []string // empty to get from everyone
 	hostnameFilter := []string{hostname}
 	// this is not a race because we should only be reading keys which we
 	// set, and there should not be any contention with other hosts here!
-	originals, err := GetResources(obj, hostnameFilter, kindFilter)
+	originals, err := GetResources(ctx, client, hostnameFilter, kindFilter)
 	if err != nil {
 		return err
 	}
@@ -81,10 +64,10 @@ func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) err
 	ops := []etcd.Op{}  // list of ops in this transaction
 	for _, res := range resourceList {
 		if res.Kind() == "" {
-			log.Fatalf("Etcd: SetResources: Error: Empty kind: %v", res.Name())
+			return fmt.Errorf("empty kind: %s", res.Name())
 		}
 		uid := fmt.Sprintf("%s/%s", res.Kind(), res.Name())
-		path := fmt.Sprintf("%s/exported/%s/resources/%s", NS, hostname, uid)
+		path := fmt.Sprintf("%s/exported/%s/resources/%s", ns, hostname, uid)
 		if data, err := engineUtil.ResToB64(res); err == nil {
 			ifs = append(ifs, etcd.Compare(etcd.Value(path), "=", data)) // desired state
 			ops = append(ops, etcd.OpPut(path, data))
@@ -106,10 +89,10 @@ func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) err
 	// delete old, now unused resources here...
 	for _, res := range originals {
 		if res.Kind() == "" {
-			log.Fatalf("Etcd: SetResources: Error: Empty kind: %v", res.Name())
+			return fmt.Errorf("empty kind: %s", res.Name())
 		}
 		uid := fmt.Sprintf("%s/%s", res.Kind(), res.Name())
-		path := fmt.Sprintf("%s/exported/%s/resources/%s", NS, hostname, uid)
+		path := fmt.Sprintf("%s/exported/%s/resources/%s", ns, hostname, uid)
 
 		if match(res, resourceList) { // if we match, no need to delete!
 			continue
@@ -124,9 +107,9 @@ func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) err
 	// it's important to do this in one transaction, and atomically, because
 	// this way, we only generate one watch event, and only when it's needed
 	if hasDeletes { // always run, ifs don't matter
-		_, err = obj.Txn(nil, ops, nil) // TODO: does this run? it should!
+		_, err = client.Txn(ctx, nil, ops, nil) // TODO: does this run? it should!
 	} else {
-		_, err = obj.Txn(ifs, nil, ops) // TODO: do we need to look at response?
+		_, err = client.Txn(ctx, ifs, nil, ops) // TODO: do we need to look at response?
 	}
 	return err
 }
@@ -136,11 +119,11 @@ func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) err
 // TODO: Expand this with a more powerful filter based on what we eventually
 // support in our collect DSL. Ideally a server side filter like WithFilter()
 // We could do this if the pattern was $NS/exported/$kind/$hostname/$uid = $data.
-func GetResources(obj *EmbdEtcd, hostnameFilter, kindFilter []string) ([]engine.Res, error) {
+func GetResources(ctx context.Context, client interfaces.Client, hostnameFilter, kindFilter []string) ([]engine.Res, error) {
 	// key structure is $NS/exported/$hostname/resources/$uid = $data
-	path := fmt.Sprintf("%s/exported/", NS)
+	path := fmt.Sprintf("%s/exported/", ns)
 	resourceList := []engine.Res{}
-	keyMap, err := obj.Get(path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend))
+	keyMap, err := client.Get(ctx, path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend))
 	if err != nil {
 		return nil, fmt.Errorf("could not get resources: %v", err)
 	}
@@ -160,7 +143,9 @@ func GetResources(obj *EmbdEtcd, hostnameFilter, kindFilter []string) ([]engine.
 		if kind == "" {
 			return nil, fmt.Errorf("unexpected kind chunk")
 		}
-
+		if name == "" { // TODO: should I check this?
+			return nil, fmt.Errorf("unexpected empty name")
+		}
 		// FIXME: ideally this would be a server side filter instead!
 		if len(hostnameFilter) > 0 && !util.StrInList(hostname, hostnameFilter) {
 			continue
@@ -171,9 +156,9 @@ func GetResources(obj *EmbdEtcd, hostnameFilter, kindFilter []string) ([]engine.
 			continue
 		}
 
-		if obj, err := engineUtil.B64ToRes(val); err == nil {
-			log.Printf("Etcd: Get: (Hostname, Kind, Name): (%s, %s, %s)", hostname, kind, name)
-			resourceList = append(resourceList, obj)
+		if res, err := engineUtil.B64ToRes(val); err == nil {
+			//obj.Logf("Get: (Hostname, Kind, Name): (%s, %s, %s)", hostname, kind, name)
+			resourceList = append(resourceList, res)
 		} else {
 			return nil, fmt.Errorf("can't convert from B64: %v", err)
 		}
diff --git a/etcd/client/simple.go b/etcd/client/simple.go
new file mode 100644
index 00000000..f025b99b
--- /dev/null
+++ b/etcd/client/simple.go
@@ -0,0 +1,484 @@
+// Mgmt
+// Copyright (C) 2013-2019+ James Shubin and the project contributors
+// Written by James Shubin <james@shubin.ca> and the project contributors
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package client
+
+import (
+	"context"
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/purpleidea/mgmt/etcd/interfaces"
+	"github.com/purpleidea/mgmt/util/errwrap"
+
+	etcd "github.com/coreos/etcd/clientv3" // "clientv3"
+	"github.com/coreos/etcd/clientv3/namespace"
+)
+
+// method represents the method we used to build the simple client.
+type method uint8
+
+const (
+	methodError method = iota
+	methodSeeds
+	methodClient
+	methodNamespace
+)
+
+// NewClientFromSeeds builds a new simple client by connecting to a list of
+// seeds.
+func NewClientFromSeeds(seeds []string) *Simple {
+	return &Simple{
+		method: methodSeeds,
+		wg:     &sync.WaitGroup{},
+
+		seeds: seeds,
+	}
+}
+
+// NewClientFromSeedsNamespace builds a new simple client by connecting to a
+// list of seeds and ensuring all key access is prefixed with a namespace.
+func NewClientFromSeedsNamespace(seeds []string, ns string) *Simple {
+	return &Simple{
+		method: methodSeeds,
+		wg:     &sync.WaitGroup{},
+
+		seeds:     seeds,
+		namespace: ns,
+	}
+}
+
+// NewClientFromClient builds a new simple client by taking an existing client
+// struct. It does not disconnect this when Close is called, as that is up to
+// the parent, which is the owner of that client input struct.
+func NewClientFromClient(client *etcd.Client) *Simple {
+	return &Simple{
+		method: methodClient,
+		wg:     &sync.WaitGroup{},
+
+		client: client,
+	}
+}
+
+// NewClientFromNamespaceStr builds a new simple client by taking an existing
+// client and a string namespace. Warning, this doesn't properly nest the
+// namespaces.
+func NewClientFromNamespaceStr(client *etcd.Client, ns string) *Simple {
+	if client == nil {
+		return &Simple{
+			method: methodError,
+			err:    fmt.Errorf("client is nil"),
+		}
+	}
+	kv := client.KV
+	w := client.Watcher
+	if ns != "" { // only layer if not empty
+		kv = namespace.NewKV(client.KV, ns)
+		w = namespace.NewWatcher(client.Watcher, ns)
+	}
+
+	return &Simple{
+		method: methodClient, // similar enough to this one to share it!
+		wg:     &sync.WaitGroup{},
+
+		client: client, // store for GetClient()
+		kv:     kv,
+		w:      w,
+	}
+}
+
+// NewClientFromSimple builds a simple client from an existing client interface
+// which must be a simple client. This awkward method is required so that
+// namespace nesting works properly, because the *etcd.Client doesn't directly
+// pass through the namespace. I'd love to nuke this function, but it's good
+// enough for now.
+func NewClientFromSimple(client interfaces.Client, ns string) *Simple {
+	if client == nil {
+		return &Simple{
+			method: methodError,
+			err:    fmt.Errorf("client is nil"),
+		}
+	}
+
+	simple, ok := client.(*Simple)
+	if !ok {
+		return &Simple{
+			method: methodError,
+			err:    fmt.Errorf("client is not simple"),
+		}
+	}
+	kv := simple.kv
+	w := simple.w
+	if ns != "" { // only layer if not empty
+		kv = namespace.NewKV(simple.kv, ns)
+		w = namespace.NewWatcher(simple.w, ns)
+	}
+
+	return &Simple{
+		method: methodNamespace,
+		wg:     &sync.WaitGroup{},
+
+		client: client.GetClient(), // store for GetClient()
+		kv:     kv,
+		w:      w,
+	}
+}
+
+// NewClientFromNamespace builds a new simple client by taking an existing set
+// of interface API's that we might use.
+func NewClientFromNamespace(client *etcd.Client, kv etcd.KV, w etcd.Watcher) *Simple {
+	return &Simple{
+		method: methodNamespace,
+		wg:     &sync.WaitGroup{},
+
+		client: client, // store for GetClient()
+		kv:     kv,
+		w:      w,
+	}
+}
+
+// Simple provides a simple etcd client for deploy and status operations. You
+// can set Debug and Logf after you've built this with one of the NewClient*
+// methods.
+type Simple struct {
+	Debug bool
+	Logf  func(format string, v ...interface{})
+
+	method method
+	wg     *sync.WaitGroup
+
+	// err is the error we set when using methodError
+	err error
+
+	// seeds is the list of endpoints to try to connect to.
+	seeds     []string
+	namespace string
+
+	// client is the etcd client connection.
+	client *etcd.Client
+
+	// kv and w are the namespaced interfaces that we got passed.
+	kv etcd.KV
+	w  etcd.Watcher
+}
+
+// logf is a safe wrapper around the Logf parameter that doesn't panic if the
+// user didn't pass a logger in.
+func (obj *Simple) logf(format string, v ...interface{}) {
+	if obj.Logf == nil {
+		return
+	}
+	obj.Logf(format, v...)
+}
+
+// config returns the config struct to be used for the etcd client connect.
+func (obj *Simple) config() etcd.Config {
+	cfg := etcd.Config{
+		Endpoints: obj.seeds,
+		// RetryDialer chooses the next endpoint to use
+		// it comes with a default dialer if unspecified
+		DialTimeout: 5 * time.Second,
+	}
+	return cfg
+}
+
+// connect connects the client to a server, and then builds the *API structs.
+func (obj *Simple) connect() error {
+	if obj.client != nil { // memoize
+		return nil
+	}
+
+	var err error
+	cfg := obj.config()
+	obj.client, err = etcd.New(cfg) // connect!
+	if err != nil {
+		return errwrap.Wrapf(err, "client connect error")
+	}
+	obj.kv = obj.client.KV
+	obj.w = obj.client.Watcher
+	if obj.namespace != "" { // bonus feature of seeds method
+		obj.kv = namespace.NewKV(obj.client.KV, obj.namespace)
+		obj.w = namespace.NewWatcher(obj.client.Watcher, obj.namespace)
+	}
+	return nil
+}
+
+// Init starts up the struct.
+func (obj *Simple) Init() error {
+	// By the end of this, we must have obj.kv and obj.w available for use.
+	switch obj.method {
+	case methodError:
+		return obj.err // use the error we set
+
+	case methodSeeds:
+		if len(obj.seeds) <= 0 {
+			return fmt.Errorf("zero seeds")
+		}
+		return obj.connect()
+
+	case methodClient:
+		if obj.client == nil {
+			return fmt.Errorf("no client")
+		}
+		if obj.kv == nil { // overwrite if not specified!
+			obj.kv = obj.client.KV
+		}
+		if obj.w == nil {
+			obj.w = obj.client.Watcher
+		}
+		return nil
+
+	case methodNamespace:
+		if obj.kv == nil || obj.w == nil {
+			return fmt.Errorf("empty namespace")
+		}
+		return nil
+	}
+
+	return fmt.Errorf("unknown method: %+v", obj.method)
+}
+
+// Close cleans up the struct after we're finished.
+func (obj *Simple) Close() error {
+	defer obj.wg.Wait()
+	switch obj.method {
+	case methodError: // for consistency
+		return fmt.Errorf("did not Init")
+
+	case methodSeeds:
+		return obj.client.Close()
+
+	case methodClient:
+		// we we're given a client, so we don't own it or close it
+		return nil
+
+	case methodNamespace:
+		return nil
+	}
+
+	return fmt.Errorf("unknown method: %+v", obj.method)
+}
+
+// GetClient returns a handle to an open etcd Client. This is needed for certain
+// upstream API's that don't support passing in KV and Watcher instead.
+func (obj *Simple) GetClient() *etcd.Client {
+	return obj.client
+}
+
+// Set runs a set operation. If you'd like more information about whether a
+// value changed or not, use Txn instead.
+func (obj *Simple) Set(ctx context.Context, key, value string, opts ...etcd.OpOption) error {
+	// key is the full key path
+	resp, err := obj.kv.Put(ctx, key, value, opts...)
+	if obj.Debug {
+		obj.logf("set(%s): %v", key, resp) // bonus
+	}
+	return err
+}
+
+// Get runs a get operation.
+func (obj *Simple) Get(ctx context.Context, path string, opts ...etcd.OpOption) (map[string]string, error) {
+	resp, err := obj.kv.Get(ctx, path, opts...)
+	if err != nil {
+		return nil, err
+	}
+	if resp == nil {
+		return nil, fmt.Errorf("empty response")
+	}
+
+	// TODO: write a resp.ToMap() function on https://godoc.org/github.com/coreos/etcd/etcdserver/etcdserverpb#RangeResponse
+	result := make(map[string]string)
+	for _, x := range resp.Kvs {
+		result[string(x.Key)] = string(x.Value)
+	}
+	return result, nil
+}
+
+// Del runs a delete operation.
+func (obj *Simple) Del(ctx context.Context, path string, opts ...etcd.OpOption) (int64, error) {
+	resp, err := obj.kv.Delete(ctx, path, opts...)
+	if err == nil {
+		return resp.Deleted, nil
+	}
+	return -1, err
+}
+
+// Txn runs a transaction.
+func (obj *Simple) Txn(ctx context.Context, ifCmps []etcd.Cmp, thenOps, elseOps []etcd.Op) (*etcd.TxnResponse, error) {
+	resp, err := obj.kv.Txn(ctx).If(ifCmps...).Then(thenOps...).Else(elseOps...).Commit()
+	if obj.Debug {
+		obj.logf("txn: %v", resp) // bonus
+	}
+	return resp, err
+}
+
+// Watcher is a watcher that returns a chan of error's instead of a chan with
+// all sorts of watcher data. This is useful when we only want an event signal,
+// but we don't care about the specifics.
+func (obj *Simple) Watcher(ctx context.Context, path string, opts ...etcd.OpOption) (chan error, error) {
+	cancelCtx, cancel := context.WithCancel(ctx)
+	info, err := obj.ComplexWatcher(cancelCtx, path, opts...)
+	if err != nil {
+		defer cancel()
+		return nil, err
+	}
+	ch := make(chan error)
+	obj.wg.Add(1) // hook in to global wait group
+	go func() {
+		defer obj.wg.Done()
+		defer close(ch)
+		defer cancel()
+		var data *interfaces.WatcherData
+		var ok bool
+		for {
+			select {
+			case data, ok = <-info.Events: // read
+				if !ok {
+					return
+				}
+			case <-cancelCtx.Done():
+				continue // wait for ch closure, but don't block
+			}
+
+			select {
+			case ch <- data.Err: // send (might be nil!)
+			case <-cancelCtx.Done():
+				continue // wait for ch closure, but don't block
+			}
+		}
+	}()
+	return ch, nil
+}
+
+// ComplexWatcher is a more capable watcher that also returns data information.
+// This starts a watch request. It writes on a channel that you can follow to
+// know when an event or an error occurs. It always sends one startup event. It
+// will not return until the watch has been started. If it cannot start, then it
+// will return an error. Remember to add the WithPrefix() option if you want to
+// watch recursively.
+// TODO: do we need to support retry and changed client connections?
+// XXX: do we need to track last successful revision and retry from there?
+// XXX: if so, use:
+// lastRev := response.Header.Revision // TODO: +1 ?
+// etcd.WithRev(rev)
+func (obj *Simple) ComplexWatcher(ctx context.Context, path string, opts ...etcd.OpOption) (*interfaces.WatcherInfo, error) {
+	if obj.client == nil { // catch bugs, this often means programming error
+		return nil, fmt.Errorf("client is nil") // extra safety!
+	}
+	cancelCtx, cancel := context.WithCancel(ctx)
+	eventsChan := make(chan *interfaces.WatcherData) // channel of runtime errors
+
+	var count uint8
+	wg := &sync.WaitGroup{}
+
+	// TODO: if we can detect the use of WithCreatedNotify, we don't need to
+	// hard-code it down below... https://github.com/coreos/etcd/issues/9689
+	// XXX: proof of concept patch: https://github.com/coreos/etcd/pull/9705
+	//for _, op := range opts {
+	//	//if op.Cmp(etcd.WithCreatedNotify()) == nil { // would be best
+	//	if etcd.OpOptionCmp(op, etcd.WithCreatedNotify()) == nil {
+	//		count++
+	//		wg.Add(1)
+	//		break
+	//	}
+	//}
+	count++
+	wg.Add(1)
+
+	wOpts := []etcd.OpOption{
+		etcd.WithCreatedNotify(),
+	}
+	wOpts = append(wOpts, opts...)
+	var err error
+
+	obj.wg.Add(1) // hook in to global wait group
+	go func() {
+		defer obj.wg.Done()
+		defer close(eventsChan)
+		defer cancel() // it's safe to cancel() more than once!
+		ch := obj.w.Watch(cancelCtx, path, wOpts...)
+		for {
+			var resp etcd.WatchResponse
+			var ok bool
+			var created bool
+			select {
+			case resp, ok = <-ch:
+				if !ok {
+					if count > 0 { // closed before startup
+						// set err in parent scope!
+						err = fmt.Errorf("watch closed")
+						count--
+						wg.Done()
+					}
+					return
+				}
+
+				// the watch is now running!
+				if count > 0 && resp.Created {
+					created = true
+					count--
+					wg.Done()
+				}
+
+				isCanceled := resp.Canceled || resp.Err() == context.Canceled
+				// TODO: this might not be needed
+				if resp.Header.Revision == 0 { // by inspection
+					if obj.Debug {
+						obj.logf("watch: received empty message") // switched client connection
+					}
+					isCanceled = true
+				}
+
+				if isCanceled {
+					data := &interfaces.WatcherData{
+						Err: context.Canceled,
+					}
+					select { // send the error
+					case eventsChan <- data:
+					case <-ctx.Done():
+						return
+					}
+					continue // channel should close shortly
+				}
+			}
+
+			// TODO: consider processing the response data into a
+			// more useful form for the callback...
+			data := &interfaces.WatcherData{
+				Created: created,
+				Path:    path,
+				Header:  resp.Header,
+				Events:  resp.Events,
+				Err:     resp.Err(),
+			}
+
+			select { // send the event
+			case eventsChan <- data:
+			case <-ctx.Done():
+				return
+			}
+		}
+	}()
+
+	wg.Wait() // wait for created event before we return
+
+	return &interfaces.WatcherInfo{
+		Cancel: cancel,
+		Events: eventsChan,
+	}, err
+}
diff --git a/etcd/str.go b/etcd/client/str/str.go
similarity index 66%
rename from etcd/str.go
rename to etcd/client/str/str.go
index 0b5fb740..26408ece 100644
--- a/etcd/str.go
+++ b/etcd/client/str/str.go
@@ -15,20 +15,22 @@
 // You should have received a copy of the GNU General Public License
 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-package etcd
+package str
 
 import (
-	"errors"
+	"context"
 	"fmt"
 
+	"github.com/purpleidea/mgmt/etcd/interfaces"
 	"github.com/purpleidea/mgmt/util/errwrap"
 
 	etcd "github.com/coreos/etcd/clientv3"
+	etcdutil "github.com/coreos/etcd/clientv3/clientv3util"
 )
 
-// ErrNotExist is returned when GetStr can not find the requested key.
-// TODO: https://dave.cheney.net/2016/04/07/constant-errors
-var ErrNotExist = errors.New("errNotExist")
+const (
+	ns = "" // in case we want to add one back in
+)
 
 // WatchStr returns a channel which spits out events on key activity.
 // FIXME: It should close the channel when it's done, and spit out errors when
@@ -37,37 +39,23 @@ var ErrNotExist = errors.New("errNotExist")
 // done, does that mean we leak go-routines since it might still be running, but
 // perhaps even blocked??? Could this cause a dead-lock? Should we instead return
 // some sort of struct which has a close method with it to ask for a shutdown?
-func WatchStr(obj *EmbdEtcd, key string) chan error {
+func WatchStr(ctx context.Context, client interfaces.Client, key string) (chan error, error) {
 	// new key structure is $NS/strings/$key = $data
-	path := fmt.Sprintf("%s/strings/%s", NS, key)
-	ch := make(chan error, 1)
-	// FIXME: fix our API so that we get a close event on shutdown.
-	callback := func(re *RE) error {
-		// TODO: is this even needed? it used to happen on conn errors
-		//log.Printf("Etcd: Watch: Path: %v", path) // event
-		if re == nil || re.response.Canceled {
-			return fmt.Errorf("watch is empty") // will cause a CtxError+retry
-		}
-		if len(ch) == 0 { // send event only if one isn't pending
-			ch <- nil // event
-		}
-		return nil
-	}
-	_, _ = obj.AddWatcher(path, callback, true, false, etcd.WithPrefix()) // no need to check errors
-	return ch
+	path := fmt.Sprintf("%s/strings/%s", ns, key)
+	return client.Watcher(ctx, path)
 }
 
 // GetStr collects the string which matches a global namespace in etcd.
-func GetStr(obj *EmbdEtcd, key string) (string, error) {
+func GetStr(ctx context.Context, client interfaces.Client, key string) (string, error) {
 	// new key structure is $NS/strings/$key = $data
-	path := fmt.Sprintf("%s/strings/%s", NS, key)
-	keyMap, err := obj.Get(path, etcd.WithPrefix())
+	path := fmt.Sprintf("%s/strings/%s", ns, key)
+	keyMap, err := client.Get(ctx, path, etcd.WithPrefix())
 	if err != nil {
 		return "", errwrap.Wrapf(err, "could not get strings in: %s", key)
 	}
 
 	if len(keyMap) == 0 {
-		return "", ErrNotExist
+		return "", interfaces.ErrNotExist
 	}
 
 	if count := len(keyMap); count != 1 {
@@ -79,23 +67,21 @@ func GetStr(obj *EmbdEtcd, key string) (string, error) {
 		return "", fmt.Errorf("path `%s` is missing", path)
 	}
 
-	//log.Printf("Etcd: GetStr(%s): %s", key, val)
 	return val, nil
 }
 
 // SetStr sets a key and hostname pair to a certain value. If the value is
 // nil, then it deletes the key. Otherwise the value should point to a string.
 // TODO: TTL or delete disconnect?
-func SetStr(obj *EmbdEtcd, key string, data *string) error {
+func SetStr(ctx context.Context, client interfaces.Client, key string, data *string) error {
 	// key structure is $NS/strings/$key = $data
-	path := fmt.Sprintf("%s/strings/%s", NS, key)
+	path := fmt.Sprintf("%s/strings/%s", ns, key)
 	ifs := []etcd.Cmp{} // list matching the desired state
 	ops := []etcd.Op{}  // list of ops in this transaction (then)
 	els := []etcd.Op{}  // list of ops in this transaction (else)
 	if data == nil {    // perform a delete
-		// TODO: use https://github.com/coreos/etcd/pull/7417 if merged
-		//ifs = append(ifs, etcd.KeyExists(path))
-		ifs = append(ifs, etcd.Compare(etcd.Version(path), ">", 0))
+		ifs = append(ifs, etcdutil.KeyExists(path))
+		//ifs = append(ifs, etcd.Compare(etcd.Version(path), ">", 0))
 		ops = append(ops, etcd.OpDelete(path))
 	} else {
 		data := *data                                                // get the real value
@@ -105,6 +91,6 @@ func SetStr(obj *EmbdEtcd, key string, data *string) error {
 
 	// it's important to do this in one transaction, and atomically, because
 	// this way, we only generate one watch event, and only when it's needed
-	_, err := obj.Txn(ifs, ops, els) // TODO: do we need to look at response?
+	_, err := client.Txn(ctx, ifs, ops, els) // TODO: do we need to look at response?
 	return errwrap.Wrapf(err, "could not set strings in: %s", key)
 }
diff --git a/etcd/strmap.go b/etcd/client/strmap/strmap.go
similarity index 71%
rename from etcd/strmap.go
rename to etcd/client/strmap/strmap.go
index 9d92ffac..91ae7e3e 100644
--- a/etcd/strmap.go
+++ b/etcd/client/strmap/strmap.go
@@ -15,50 +15,43 @@
 // You should have received a copy of the GNU General Public License
 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-package etcd
+package strmap
 
 import (
+	"context"
 	"fmt"
 	"strings"
 
+	"github.com/purpleidea/mgmt/etcd/interfaces"
 	"github.com/purpleidea/mgmt/util"
 	"github.com/purpleidea/mgmt/util/errwrap"
 
 	etcd "github.com/coreos/etcd/clientv3"
+	etcdutil "github.com/coreos/etcd/clientv3/clientv3util"
+)
+
+const (
+	ns = "" // in case we want to add one back in
 )
 
 // WatchStrMap returns a channel which spits out events on key activity.
 // FIXME: It should close the channel when it's done, and spit out errors when
 // something goes wrong.
-func WatchStrMap(obj *EmbdEtcd, key string) chan error {
+func WatchStrMap(ctx context.Context, client interfaces.Client, key string) (chan error, error) {
 	// new key structure is $NS/strings/$key/$hostname = $data
-	path := fmt.Sprintf("%s/strings/%s", NS, key)
-	ch := make(chan error, 1)
-	// FIXME: fix our API so that we get a close event on shutdown.
-	callback := func(re *RE) error {
-		// TODO: is this even needed? it used to happen on conn errors
-		//log.Printf("Etcd: Watch: Path: %v", path) // event
-		if re == nil || re.response.Canceled {
-			return fmt.Errorf("watch is empty") // will cause a CtxError+retry
-		}
-		if len(ch) == 0 { // send event only if one isn't pending
-			ch <- nil // event
-		}
-		return nil
-	}
-	_, _ = obj.AddWatcher(path, callback, true, false, etcd.WithPrefix()) // no need to check errors
-	return ch
+	path := fmt.Sprintf("%s/strings/%s", ns, key)
+	return client.Watcher(ctx, path, etcd.WithPrefix())
 }
 
 // GetStrMap collects all of the strings which match a namespace in etcd.
-func GetStrMap(obj *EmbdEtcd, hostnameFilter []string, key string) (map[string]string, error) {
+func GetStrMap(ctx context.Context, client interfaces.Client, hostnameFilter []string, key string) (map[string]string, error) {
 	// old key structure is $NS/strings/$hostname/$key = $data
 	// new key structure is $NS/strings/$key/$hostname = $data
 	// FIXME: if we have the $key as the last token (old key structure), we
 	// can allow the key to contain the slash char, otherwise we need to
 	// verify that one isn't present in the input string.
-	path := fmt.Sprintf("%s/strings/%s", NS, key)
-	keyMap, err := obj.Get(path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend))
+	path := fmt.Sprintf("%s/strings/%s", ns, key)
+	keyMap, err := client.Get(ctx, path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend))
 	if err != nil {
 		return nil, errwrap.Wrapf(err, "could not get strings in: %s", key)
 	}
@@ -91,16 +84,15 @@ func GetStrMap(obj *EmbdEtcd, hostnameFilter []string, key string) (map[string]s
 // SetStrMap sets a key and hostname pair to a certain value. If the value is
 // nil, then it deletes the key. Otherwise the value should point to a string.
 // TODO: TTL or delete disconnect?
-func SetStrMap(obj *EmbdEtcd, hostname, key string, data *string) error {
+func SetStrMap(ctx context.Context, client interfaces.Client, hostname, key string, data *string) error {
 	// key structure is $NS/strings/$key/$hostname = $data
-	path := fmt.Sprintf("%s/strings/%s/%s", NS, key, hostname)
+	path := fmt.Sprintf("%s/strings/%s/%s", ns, key, hostname)
 	ifs := []etcd.Cmp{} // list matching the desired state
 	ops := []etcd.Op{}  // list of ops in this transaction (then)
 	els := []etcd.Op{}  // list of ops in this transaction (else)
 	if data == nil {    // perform a delete
-		// TODO: use https://github.com/coreos/etcd/pull/7417 if merged
-		//ifs = append(ifs, etcd.KeyExists(path))
-		ifs = append(ifs, etcd.Compare(etcd.Version(path), ">", 0))
+		ifs = append(ifs, etcdutil.KeyExists(path))
+		//ifs = append(ifs, etcd.Compare(etcd.Version(path), ">", 0))
 		ops = append(ops, etcd.OpDelete(path))
 	} else {
 		data := *data                                                // get the real value
@@ -110,6 +102,6 @@ func SetStrMap(obj *EmbdEtcd, hostname, key string, data *string) error {
 
 	// it's important to do this in one transaction, and atomically, because
 	// this way, we only generate one watch event, and only when it's needed
-	_, err := obj.Txn(ifs, ops, els) // TODO: do we need to look at response?
+	_, err := client.Txn(ctx, ifs, ops, els) // TODO: do we need to look at response?
 	return errwrap.Wrapf(err, "could not set strings in: %s", key)
 }
diff --git a/etcd/converger.go b/etcd/converger.go
new file mode 100644
index 00000000..b4d72ae7
--- /dev/null
+++ b/etcd/converger.go
@@ -0,0 +1,49 @@
+// Mgmt
+// Copyright (C) 2013-2019+ James Shubin and the project contributors
+// Written by James Shubin <james@shubin.ca> and the project contributors
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package etcd
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/purpleidea/mgmt/util/errwrap"
+
+	etcd "github.com/coreos/etcd/clientv3"
+)
+
+// setHostnameConverged sets whether a specific hostname is converged.
+func (obj *EmbdEtcd) setHostnameConverged(ctx context.Context, hostname string, isConverged bool) error {
+	if obj.Debug {
+		obj.Logf("setHostnameConverged(%s): %t", hostname, isConverged)
+		defer obj.Logf("setHostnameConverged(%s): done!", hostname)
+	}
+
+	key := fmt.Sprintf(obj.NS+convergedPathFmt, hostname)
+	data := fmt.Sprintf("%t", isConverged)
+
+	// XXX: bug: https://github.com/etcd-io/etcd/issues/10566
+	// XXX: reverse things with els to workaround the bug :(
+	//ifs := []etcd.Cmp{etcd.Compare(etcd.Value(key), "!=", data)} // desired state
+	//ops := []etcd.Op{etcd.OpPut(key, data, etcd.WithLease(obj.leaseID))}
+	ifs := []etcd.Cmp{etcd.Compare(etcd.Value(key), "=", data)} // desired state
+	ifs = append(ifs, etcd.Compare(etcd.LeaseValue(key), "=", obj.leaseID))
+	els := []etcd.Op{etcd.OpPut(key, data, etcd.WithLease(obj.leaseID))}
+
+	_, err := obj.client.Txn(ctx, ifs, nil, els)
+	return errwrap.Wrapf(err, "set hostname converged failed")
+}
diff --git a/etcd/deploy.go b/etcd/deployer/deployer.go
similarity index 64%
rename from etcd/deploy.go
rename to etcd/deployer/deployer.go
index b9c3fccc..90523807 100644
--- a/etcd/deploy.go
+++ b/etcd/deployer/deployer.go
@@ -15,16 +15,20 @@
 // You should have received a copy of the GNU General Public License
 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-package etcd
+package deployer
 
 import (
+	"context"
 	"fmt"
 	"strconv"
 	"strings"
+	"sync"
 
+	"github.com/purpleidea/mgmt/etcd/interfaces"
 	"github.com/purpleidea/mgmt/util/errwrap"
 
 	etcd "github.com/coreos/etcd/clientv3"
+	etcdutil "github.com/coreos/etcd/clientv3/clientv3util"
 )
 
 const (
@@ -33,34 +37,52 @@ const (
 	hashPath    = "hash"
 )
 
-// WatchDeploy returns a channel which spits out events on new deploy activity.
-// FIXME: It should close the channel when it's done, and spit out errors when
-// something goes wrong.
-func WatchDeploy(obj *EmbdEtcd) chan error {
-	// key structure is $NS/deploy/$id/payload = $data
-	path := fmt.Sprintf("%s/%s/", NS, deployPath)
-	ch := make(chan error, 1)
-	// FIXME: fix our API so that we get a close event on shutdown.
-	callback := func(re *RE) error {
-		// TODO: is this even needed? it used to happen on conn errors
-		//log.Printf("Etcd: Watch: Path: %v", path) // event
-		if re == nil || re.response.Canceled {
-			return fmt.Errorf("watch is empty") // will cause a CtxError+retry
-		}
-		if len(ch) == 0 { // send event only if one isn't pending
-			ch <- nil // event
-		}
-		return nil
+// SimpleDeploy is a deploy struct that provides all of the needed deploy
+// methods. It requires that you give it a Client interface so that it can
+// perform its remote work. You must call Init before you use it, and Close when
+// you are done.
+type SimpleDeploy struct {
+	Client interfaces.Client
+
+	Debug bool
+	Logf  func(format string, v ...interface{})
+
+	ns string // TODO: if we ever need to hardcode a base path
+	wg *sync.WaitGroup
+}
+
+// Init validates the deploy structure and prepares it for first use.
+func (obj *SimpleDeploy) Init() error {
+	if obj.Client == nil {
+		return fmt.Errorf("the Client was not specified")
 	}
-	_, _ = obj.AddWatcher(path, callback, true, false, etcd.WithPrefix()) // no need to check errors
-	return ch
+	obj.wg = &sync.WaitGroup{}
+	return nil
+}
+
+// Close cleans up after using the deploy struct and waits for any ongoing
+// watches to exit before it returns.
+func (obj *SimpleDeploy) Close() error {
+	obj.wg.Wait()
+	return nil
+}
+
+// WatchDeploy returns a channel which spits out events on new deploy activity.
+// It closes the channel when it's done, and spits out errors when something
+// goes wrong. If it can't start up, it errors immediately. The returned channel
+// is buffered, so that a quick succession of events will get discarded.
+func (obj *SimpleDeploy) WatchDeploy(ctx context.Context) (chan error, error) {
+	// key structure is $NS/deploy/$id/payload = $data
+	path := fmt.Sprintf("%s/%s/", obj.ns, deployPath)
+	// FIXME: obj.wg.Add(1) && obj.wg.Done()
+	return obj.Client.Watcher(ctx, path, etcd.WithPrefix())
 }
 
 // GetDeploys gets all the available deploys.
-func GetDeploys(obj Client) (map[uint64]string, error) {
+func (obj *SimpleDeploy) GetDeploys(ctx context.Context) (map[uint64]string, error) {
 	// key structure is $NS/deploy/$id/payload = $data
-	path := fmt.Sprintf("%s/%s/", NS, deployPath)
-	keyMap, err := obj.Get(path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend))
+	path := fmt.Sprintf("%s/%s/", obj.ns, deployPath)
+	keyMap, err := obj.Client.Get(ctx, path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend))
 	if err != nil {
 		return nil, errwrap.Wrapf(err, "could not get deploy")
 	}
@@ -86,7 +108,7 @@ func GetDeploys(obj Client) (map[uint64]string, error) {
 		}
 
 		// TODO: do some sort of filtering here?
-		//log.Printf("Etcd: GetDeploys(%s): Id => Data: %d => %s", key, id, val)
+		//obj.Logf("GetDeploys(%s): Id => Data: %d => %s", key, id, val)
 		result[id] = val
 	}
 	return result, nil
@@ -107,8 +129,8 @@ func calculateMax(deploys map[uint64]string) uint64 {
 // an id of 0, you'll get back an empty deploy without error. This is useful so
 // that you can pass through this function easily.
 // FIXME: implement this more efficiently so that it doesn't have to download *all* the old deploys from etcd!
-func GetDeploy(obj Client, id uint64) (string, error) {
-	result, err := GetDeploys(obj)
+func (obj *SimpleDeploy) GetDeploy(ctx context.Context, id uint64) (string, error) {
+	result, err := obj.GetDeploys(ctx)
 	if err != nil {
 		return "", err
 	}
@@ -130,9 +152,9 @@ func GetDeploy(obj Client, id uint64) (string, error) {
 // zero. You must increment the returned value by one when you add a deploy. If
 // two or more clients race for this deploy id, then the loser is not committed,
 // and must repeat this GetMaxDeployID process until it succeeds with a commit!
-func GetMaxDeployID(obj Client) (uint64, error) {
+func (obj *SimpleDeploy) GetMaxDeployID(ctx context.Context) (uint64, error) {
 	// TODO: this was all implemented super inefficiently, fix up for perf!
-	deploys, err := GetDeploys(obj) // get previous deploys
+	deploys, err := obj.GetDeploys(ctx) // get previous deploys
 	if err != nil {
 		return 0, errwrap.Wrapf(err, "error getting previous deploys")
 	}
@@ -148,29 +170,28 @@ func GetMaxDeployID(obj Client) (uint64, error) {
 // contributors pushing conflicting deploys. This isn't git specific, and so any
 // arbitrary string hash can be used.
 // FIXME: prune old deploys from the store when they aren't needed anymore...
-func AddDeploy(obj Client, id uint64, hash, pHash string, data *string) error {
+func (obj *SimpleDeploy) AddDeploy(ctx context.Context, id uint64, hash, pHash string, data *string) error {
 	// key structure is $NS/deploy/$id/payload = $data
 	// key structure is $NS/deploy/$id/hash = $hash
-	path := fmt.Sprintf("%s/%s/%d/%s", NS, deployPath, id, payloadPath)
-	tPath := fmt.Sprintf("%s/%s/%d/%s", NS, deployPath, id, hashPath)
+	path := fmt.Sprintf("%s/%s/%d/%s", obj.ns, deployPath, id, payloadPath)
+	tPath := fmt.Sprintf("%s/%s/%d/%s", obj.ns, deployPath, id, hashPath)
 	ifs := []etcd.Cmp{} // list matching the desired state
 	ops := []etcd.Op{}  // list of ops in this transaction (then)
 
-	// TODO: use https://github.com/coreos/etcd/pull/7417 if merged
 	// we're append only, so ensure this unique deploy id doesn't exist
-	ifs = append(ifs, etcd.Compare(etcd.Version(path), "=", 0)) // KeyMissing
-	//ifs = append(ifs, etcd.KeyMissing(path))
+	//ifs = append(ifs, etcd.Compare(etcd.Version(path), "=", 0)) // KeyMissing
+	ifs = append(ifs, etcdutil.KeyMissing(path))
 
 	// don't look for previous deploy if this is the first deploy ever
 	if id > 1 {
 		// we append sequentially, so ensure previous key *does* exist
-		prev := fmt.Sprintf("%s/%s/%d/%s", NS, deployPath, id-1, payloadPath)
-		ifs = append(ifs, etcd.Compare(etcd.Version(prev), ">", 0)) // KeyExists
-		//ifs = append(ifs, etcd.KeyExists(prev))
+		prev := fmt.Sprintf("%s/%s/%d/%s", obj.ns, deployPath, id-1, payloadPath)
+		//ifs = append(ifs, etcd.Compare(etcd.Version(prev), ">", 0)) // KeyExists
+		ifs = append(ifs, etcdutil.KeyExists(prev))
 
 		if hash != "" && pHash != "" {
 			// does the previously stored hash match what we expect?
-			prevHash := fmt.Sprintf("%s/%s/%d/%s", NS, deployPath, id-1, hashPath)
+			prevHash := fmt.Sprintf("%s/%s/%d/%s", obj.ns, deployPath, id-1, hashPath)
 			ifs = append(ifs, etcd.Compare(etcd.Value(prevHash), "=", pHash))
 		}
 	}
@@ -182,7 +203,7 @@ func AddDeploy(obj Client, id uint64, hash, pHash string, data *string) error {
 
 	// it's important to do this in one transaction, and atomically, because
 	// this way, we only generate one watch event, and only when it's needed
-	result, err := obj.Txn(ifs, ops, nil)
+	result, err := obj.Client.Txn(ctx, ifs, ops, nil)
 	if err != nil {
 		return errwrap.Wrapf(err, "error creating deploy id %d", id)
 	}
diff --git a/etcd/interfaces.go b/etcd/error.go
similarity index 68%
rename from etcd/interfaces.go
rename to etcd/error.go
index 71bf049f..dca8ef87 100644
--- a/etcd/interfaces.go
+++ b/etcd/error.go
@@ -18,13 +18,10 @@
 package etcd
 
 import (
-	etcd "github.com/coreos/etcd/clientv3" // "clientv3"
+	"github.com/purpleidea/mgmt/etcd/interfaces"
 )
 
-// Client provides a simple interface specification for client requests. Both
-// EmbdEtcd and ClientEtcd implement this.
-type Client interface {
-	// TODO: add more method signatures
-	Get(path string, opts ...etcd.OpOption) (map[string]string, error)
-	Txn(ifcmps []etcd.Cmp, thenops, elseops []etcd.Op) (*etcd.TxnResponse, error)
-}
+const (
+	// errInconsistentApply means applyDeltaEvents wasn't consistent.
+	errInconsistentApply = interfaces.Error("inconsistent apply")
+)
diff --git a/etcd/etcd.go b/etcd/etcd.go
index 6042781f..9a081906 100644
--- a/etcd/etcd.go
+++ b/etcd/etcd.go
@@ -15,81 +15,165 @@
 // You should have received a copy of the GNU General Public License
 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-// TODO: Add TTL's (eg: volunteering)
-// TODO: Remove race around leader operations
-// TODO: Fix server reuse issue (bind: address already in use)
-// TODO: Fix unstarted member
-// TODO: Fix excessive StartLoop/FinishLoop
-// TODO: Add VIP for servers (incorporate with net resource)
-// TODO: Auto assign ports/ip's for peers (if possible)
-// TODO: Fix godoc
+// TODO: remove race around leader operations
+// TODO: fix unstarted member
+// TODO: add VIP for servers (incorporate with net resource)
+// TODO: auto assign ports/ip's for peers (if possible)
+// TODO: check the shutdown ordering, so everything unrolls to a shutdown
+// TODO: add the converger Register/Unregister stuff and timers if needed
 
-// Package etcd implements the distributed key value store integration.
-// This also takes care of managing and clustering the embedded etcd server.
-// The elastic etcd algorithm works in the following way:
-// * When you start up mgmt, you can pass it a list of seeds.
-// * If no seeds are given, then assume you are the first server and startup.
-// * If a seed is given, connect as a client, and optionally volunteer to be a server.
-// * All volunteering clients should listen for a message from the master for nomination.
-// * If a client has been nominated, it should startup a server.
-// * All servers should listen for their nomination to be removed and shutdown if so.
-// * The elected leader should decide who to nominate/unnominate to keep the right number of servers.
+// Package etcd implements the distributed key value store and fs integration.
+// This also takes care of managing and clustering of the embedded etcd server.
+// The automatic clustering is considered experimental. If you require a more
+// robust, battle-test etcd cluster, then manage your own, and point each mgmt
+// agent at it with --seeds and --no-server.
 //
-// Smoke testing:
-// mkdir /tmp/mgmt{A..E}
-// ./mgmt run --hostname h1 --tmp-prefix --no-pgp yaml --yaml examples/yaml/etcd1a.yaml
-// ./mgmt run --hostname h2 --tmp-prefix --no-pgp --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 yaml --yaml examples/yaml/etcd1b.yaml
-// ./mgmt run --hostname h3 --tmp-prefix --no-pgp --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 yaml --yaml examples/yaml/etcd1c.yaml
-// ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/idealClusterSize 3
-// ./mgmt run --hostname h4 --tmp-prefix --no-pgp --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 yaml --yaml examples/yaml/etcd1d.yaml
-// ./mgmt run --hostname h5 --tmp-prefix --no-pgp --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2387 --server-urls http://127.0.0.1:2388 yaml --yaml examples/yaml/etcd1e.yaml
-// ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 member list
-// ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2381 put /_mgmt/idealClusterSize 5
-// ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2381 member list
+// Algorithm
+//
+// The elastic etcd algorithm works in the following way:
+//
+// * When you start up mgmt, you can pass it a list of seeds.
+//
+// * If no seeds are given, then assume you are the first server and startup.
+//
+// * If a seed is given, connect as a client, and volunteer to be a server.
+//
+// * All volunteering clients should listen for a message for nomination.
+//
+// * If a client has been nominated, it should startup a server.
+//
+// * A server should shutdown if its nomination is removed.
+//
+// * The elected leader should decide who to nominate/unnominate as needed.
+//
+// Notes
+//
+// If you attempt to add a new member to the cluster with a duplicate hostname,
+// then the behaviour is undefined, and you could bork your cluster. This is not
+// recommended or supported. Please ensure that your hostnames are unique.
+//
+// A single ^C requests an orderly shutdown, however a third ^C will ask etcd to
+// shutdown forcefully. It is not recommended that you use this option, it
+// exists as a way to make exit easier if something deadlocked the cluster. If
+// this was due to user error (eg: duplicate hostnames) then it was your fault,
+// but if the member did not shutdown from a single ^C under normal
+// circumstances, then please file a bug.
+//
+// There are currently some races in this implementation. In practice, this
+// should not cause any adverse effects unless you simultaneously add or remove
+// members at a high rate. Fixing these races will probably require some
+// internal changes to etcd. Help is welcome if you're interested in working on
+// this.
+//
+// Smoke testing
+//
+// Here is a simple way to test etcd clustering basics...
+//
+//  ./mgmt run --tmp-prefix --no-pgp --hostname h1 empty
+//  ./mgmt run --tmp-prefix --no-pgp --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 empty
+//  ./mgmt run --tmp-prefix --no-pgp --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 empty
+//  ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/chooser/dynamicsize/idealclustersize 3
+//  ./mgmt run --tmp-prefix --no-pgp --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 empty
+//  ./mgmt run --tmp-prefix --no-pgp --hostname h5 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2387 --server-urls http://127.0.0.1:2388 empty
+//  ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 member list
+//  ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2381 put /_mgmt/chooser/dynamicsize/idealclustersize 5
+//  ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2381 member list
+//
+// Bugs
+//
+// A member might occasionally think that an endpoint still exists after it has
+// already shutdown. This isn't a major issue, since if that endpoint doesn't
+// respond, then it will automatically choose the next available one. To see
+// this issue, turn on debugging and start: H1, H2, H3, then stop H2, and you
+// might see that H3 still knows about H2.
+//
+// Shutting down a cluster by setting the idealclustersize to zero is currently
+// buggy and not supported. Try this at your own risk.
+//
+// If a member is nominated, and it doesn't respond to the nominate event and
+// startup, and we lost quorum to add it, then we could be in a blocked state.
+// This can be improved upon if we can call memberRemove after a timeout.
+//
+// Adding new cluster members very quickly, might trigger a:
+// `runtime error: error validating peerURLs ... member count is unequal` error.
+// See: https://github.com/etcd-io/etcd/issues/10626 for more information.
+//
+// If you use the dynamic size feature to start and stop the server process,
+// once it has already started and then stopped, it can't be re-started because
+// of a bug in etcd that doesn't free the port. Instead you'll get a:
+// `bind: address already in use` error. See:
+// https://github.com/etcd-io/etcd/issues/6042 for more information.
 package etcd
 
 import (
-	"bytes"
-	"errors"
+	"context"
 	"fmt"
-	"log"
-	"math"
 	"net/url"
 	"os"
-	"path"
 	"sort"
-	"strconv"
 	"strings"
 	"sync"
 	"time"
 
 	"github.com/purpleidea/mgmt/converger"
-	"github.com/purpleidea/mgmt/etcd/event"
+	"github.com/purpleidea/mgmt/etcd/chooser"
+	"github.com/purpleidea/mgmt/etcd/client"
+	"github.com/purpleidea/mgmt/etcd/interfaces"
 	"github.com/purpleidea/mgmt/util"
+	"github.com/purpleidea/mgmt/util/errwrap"
 
 	etcd "github.com/coreos/etcd/clientv3" // "clientv3"
+	"github.com/coreos/etcd/clientv3/concurrency"
+	"github.com/coreos/etcd/clientv3/namespace"
 	"github.com/coreos/etcd/embed"
-	"github.com/coreos/etcd/etcdserver"
-	rpctypes "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
 	etcdtypes "github.com/coreos/etcd/pkg/types"
-	raft "github.com/coreos/etcd/raft"
-	context "golang.org/x/net/context"
-	"google.golang.org/grpc"
 )
 
-// constant parameters which may need to be tweaked or customized
 const (
-	NS                      = "/_mgmt" // root namespace for mgmt operations
-	seedSentinel            = "_seed"  // you must not name your hostname this
-	MaxStartServerTimeout   = 60       // max number of seconds to wait for server to start
-	MaxStartServerRetries   = 3        // number of times to retry starting the etcd server
-	maxClientConnectRetries = 5        // number of times to retry consecutive connect failures
-	selfRemoveTimeout       = 3        // give unnominated members a chance to self exit
-	exitDelay               = 3        // number of sec of inactivity after exit to clean up
-	DefaultIdealClusterSize = 5        // default ideal cluster size target for initial seed
+	// TODO: figure out a trailing slash convention...
 
+	// NominatedPath is the unprefixed path under which nominated hosts are
+	// stored. This is public so that other consumers can know to avoid this
+	// key prefix.
+	NominatedPath    = "/nominated/"
+	nominatedPathFmt = NominatedPath + "%s" // takes a hostname on the end
+
+	// VolunteerPath is the unprefixed path under which volunteering hosts
+	// are stored. This is public so that other consumers can know to avoid
+	// this key prefix.
+	VolunteerPath    = "/volunteer/"
+	volunteerPathFmt = VolunteerPath + "%s" // takes a hostname on the end
+
+	// EndpointsPath is the unprefixed path under which the advertised host
+	// endpoints are stored. This is public so that other consumers can know
+	// to avoid this key prefix.
+	EndpointsPath    = "/endpoints/"
+	endpointsPathFmt = EndpointsPath + "%s" // takes a hostname on the end
+
+	// ChooserPath is the unprefixed path under which the chooser algorithm
+	// may store data. This is public so that other consumers can know to
+	// avoid this key prefix.
+	ChooserPath = "/chooser" // all hosts share the same namespace
+
+	// ConvergedPath is the unprefixed path under which the converger
+	// may store data. This is public so that other consumers can know to
+	// avoid this key prefix.
+	ConvergedPath    = "/converged/"
+	convergedPathFmt = ConvergedPath + "%s" // takes a hostname on the end
+
+	// SchedulerPath is the unprefixed path under which the scheduler
+	// may store data. This is public so that other consumers can know to
+	// avoid this key prefix.
+	SchedulerPath    = "/scheduler/"
+	schedulerPathFmt = SchedulerPath + "%s" // takes a namespace on the end
+
+	// DefaultClientURL is the default value that is used for client URLs.
+	// It is pulled from the upstream etcd package.
 	DefaultClientURL = embed.DefaultListenClientURLs // 127.0.0.1:2379
-	DefaultServerURL = embed.DefaultListenPeerURLs   // 127.0.0.1:2380
+
+	// DefaultServerURL is the default value that is used for server URLs.
+	// It is pulled from the upstream etcd package.
+	DefaultServerURL = embed.DefaultListenPeerURLs // 127.0.0.1:2380
 
 	// DefaultMaxTxnOps is the maximum number of operations to run in a
 	// single etcd transaction. If you exceed this limit, it is possible
@@ -98,1777 +182,1247 @@ const (
 	// know so that we can analyze the situation, and increase this if
 	// necessary.
 	DefaultMaxTxnOps = 512
+
+	// RunStartupTimeout is the amount of time we will wait for regular run
+	// startup before cancelling it all.
+	RunStartupTimeout = 30 * time.Second
+
+	// ClientDialTimeout is the DialTimeout option in the client config.
+	ClientDialTimeout = 5 * time.Second
+
+	// ClientDialKeepAliveTime is the DialKeepAliveTime config value for the
+	// etcd client. It is recommended that you use this so that dead
+	// endpoints don't block any cluster operations.
+	ClientDialKeepAliveTime = 2 * time.Second // from etcdctl
+	// ClientDialKeepAliveTimeout is the DialKeepAliveTimeout config value
+	// for the etcd client. It is recommended that you use this so that dead
+	// endpoints don't block any cluster operations.
+	ClientDialKeepAliveTimeout = 6 * time.Second // from etcdctl
+
+	// MemberChangeInterval is the polling interval to use when watching for
+	// member changes during add or remove.
+	MemberChangeInterval = 500 * time.Millisecond
+
+	// SelfRemoveTimeout gives unnominated members a chance to self exit.
+	SelfRemoveTimeout = 10 * time.Second
+
+	// ForceExitTimeout is the amount of time we will wait for a force exit
+	// to occur before cancelling it all.
+	ForceExitTimeout = 15 * time.Second
+
+	// SessionTTL is the number of seconds to wait before a dead or
+	// unresponsive host has their volunteer keys removed from the cluster.
+	// This should be an integer multiple of seconds, since one second is
+	// the TTL precision used in etcd.
+	SessionTTL = 10 * time.Second // seconds
+
+	// RequireLeaderCtx specifies whether the volunteer loop should use the
+	// WithRequireLeader ctx wrapper. It is unknown at this time if this
+	// would cause occasional events to be lost, more extensive testing is
+	// needed.
+	RequireLeaderCtx = false
+
+	// ConvergerHostnameNamespace is a unique key used in the converger.
+	ConvergerHostnameNamespace = "etcd-hostname"
 )
 
-var (
-	errApplyDeltaEventsInconsistent = errors.New("inconsistent key in ApplyDeltaEvents")
-)
-
-// AW is a struct for the AddWatcher queue.
-type AW struct {
-	path       string
-	opts       []etcd.OpOption
-	callback   func(*RE) error
-	errCheck   bool
-	skipConv   bool // ask event to skip converger updates
-	resp       event.Resp
-	cancelFunc func() // data
-}
-
-// RE is a response + error struct since these two values often occur together.
-// This is now called an event with the move to the etcd v3 API.
-type RE struct {
-	response  etcd.WatchResponse
-	path      string
-	err       error
-	callback  func(*RE) error
-	errCheck  bool // should we check the error of the callback?
-	skipConv  bool // event skips converger updates
-	retryHint bool // set to true for one event after a watcher failure
-	retries   uint // number of times we've retried on error
-}
-
-// KV is a key + value struct to hold the two items together.
-type KV struct {
-	key   string
-	value string
-	opts  []etcd.OpOption
-	resp  event.Resp
-}
-
-// GQ is a struct for the get queue.
-type GQ struct {
-	path     string
-	skipConv bool
-	opts     []etcd.OpOption
-	resp     event.Resp
-	data     map[string]string
-}
-
-// DL is a struct for the delete queue.
-type DL struct {
-	path string
-	opts []etcd.OpOption
-	resp event.Resp
-	data int64
-}
-
-// TN is a struct for the txn queue.
-type TN struct {
-	ifcmps  []etcd.Cmp
-	thenops []etcd.Op
-	elseops []etcd.Op
-	resp    event.Resp
-	data    *etcd.TxnResponse
-}
-
-// Flags are some constant flags which are used throughout the program.
-type Flags struct {
-	Debug   bool // add additional log messages
-	Trace   bool // add execution flow log messages
-	Verbose bool // add extra log message output
-}
-
 // EmbdEtcd provides the embedded server and client etcd functionality.
 type EmbdEtcd struct { // EMBeddeD etcd
-	// etcd client connection related
-	cLock  sync.Mutex   // client connect lock
-	rLock  sync.RWMutex // client reconnect lock
-	client *etcd.Client
-	cError error // permanent client error
-	ctxErr error // permanent ctx error
+	Hostname string
 
-	// exit and cleanup related
-	cancelLock sync.Mutex // lock for the cancels list
-	cancels    []func()   // array of every cancel function for watches
-	exiting    bool
-	exitchan   chan struct{}
-	exitchanCb chan struct{}
-	exitwg     *sync.WaitGroup // wait for main loops to shutdown
+	// Seeds is the list of servers that this client could connect to.
+	Seeds etcdtypes.URLs
 
-	hostname            string
-	memberID            uint64            // cluster membership id of server if running
-	endpoints           etcdtypes.URLsMap // map of servers a client could connect to
-	clientURLs          etcdtypes.URLs    // locations to listen for clients if i am a server
-	serverURLs          etcdtypes.URLs    // locations to listen for servers if i am a server (peer)
-	advertiseClientURLs etcdtypes.URLs    // client urls to advertise
-	advertiseServerURLs etcdtypes.URLs    // server urls to advertise
-	noServer            bool              // disable all server peering if true
-	noNetwork           bool              // use unix:// sockets instead of TCP for clients/servers
+	// ClientURLs are the locations to listen for clients if i am a server.
+	ClientURLs etcdtypes.URLs
+	// ServerURLs are the locations to listen for servers (peers) if i am a
+	// server (peer).
+	ServerURLs etcdtypes.URLs
+	// AClientURLs are the client urls to advertise.
+	AClientURLs etcdtypes.URLs
+	// AServerURLscare the server (peer) urls to advertise.
+	AServerURLs etcdtypes.URLs
 
-	// local tracked state
-	nominated        etcdtypes.URLsMap // copy of who's nominated to locally track state
-	lastRevision     int64             // the revision id of message being processed
-	idealClusterSize uint16            // ideal cluster size
+	// NoServer disables all server peering for this host.
+	// TODO: allow changing this at runtime with some function call?
+	NoServer bool
+	// NoNetwork causes this to use unix:// sockets instead of TCP for
+	// connections.
+	NoNetwork bool
 
-	// etcd channels
-	awq     chan *AW // add watch queue
-	wevents chan *RE // response+error
-	setq    chan *KV // set queue
-	getq    chan *GQ // get queue
-	delq    chan *DL // delete queue
-	txnq    chan *TN // txn queue
+	// Chooser is the implementation of the algorithm that decides which
+	// hosts to add or remove to grow and shrink the cluster.
+	Chooser chooser.Chooser
 
-	flags     Flags
-	prefix    string                 // folder prefix to use for misc storage
-	converger *converger.Coordinator // converged tracking
+	// Converger is a converged coordinator object that can be used to
+	// track the converged state.
+	Converger *converger.Coordinator
 
-	// etcd server related
-	serverwg    sync.WaitGroup // wait for server to shutdown
-	server      *embed.Etcd    // technically this contains the server struct
-	dataDir     string         // our data dir, prefix + "etcd"
-	serverReady chan struct{}  // closes when ready
+	// NS is a string namespace that we prefix to every key operation.
+	NS string
+
+	// Prefix is the directory where any etcd related state is stored. It
+	// must be an absolute directory path.
+	Prefix string
+
+	Debug bool
+	Logf  func(format string, v ...interface{})
+
+	wg       *sync.WaitGroup
+	exit     *util.EasyExit // exit signal
+	closing  bool           // are we closing ?
+	hardexit *util.EasyExit // hard exit signal (to unblock borked things)
+
+	errChan chan error // global error chan, closes when Run is done
+
+	// errExit1 ... errExitN all must get closed for errChan to close.
+	errExit1 chan struct{} // connect
+	errExit2 chan struct{} // chooser
+	errExit3 chan struct{} // nominate
+	errExit4 chan struct{} // volunteer
+	errExit5 chan struct{} // endpoints
+	errExitN chan struct{} // special signal for server closing (starts/stops)
+
+	// coordinate an organized exit so we wait for everyone without blocking
+	activeExit1   bool
+	activeExit2   bool
+	activeExit3   bool
+	activeExit4   bool
+	activeExit5   bool
+	activateExit1 *util.EasyAckOnce
+	activateExit2 *util.EasyAckOnce
+	activateExit3 *util.EasyAckOnce
+	activateExit4 *util.EasyAckOnce
+	activateExit5 *util.EasyAckOnce
+
+	readySignal chan struct{} // closes when we're up and running
+	exitsSignal chan struct{} // closes when run exits
+
+	// locally tracked state
+
+	// nominated is a local cache of who's been nominated. This contains
+	// values for where a *server* would connect to. It gets updated
+	// primarily in the nominateCb watcher loop.
+	// TODO: maybe this should just be a list?
+	// TODO: is there a difference here between ServerURLs and AServerURLs ?
+	nominated etcdtypes.URLsMap // map[hostname]URLs
+
+	// volunteers is a local cache of who's volunteered. This contains
+	// values for where a *server* would connect to. It gets updated
+	// primarily in the volunteerCb watcher loop.
+	// TODO: maybe this should just be a list?
+	// TODO: is there a difference here between ServerURLs and AServerURLs ?
+	volunteers etcdtypes.URLsMap // map[hostname]URLs
+
+	// membermap is a local cache of server endpoints. This contains values
+	// for where a *server* (peer) would connect to. It gets updated in the
+	// membership state functions.
+	membermap etcdtypes.URLsMap // map[hostname]URLs
+
+	// endpoints is a local cache of server endpoints. It differs from the
+	// config value which is a flattened representation of the same. That
+	// value can be seen via client.Endpoints() and client.SetEndpoints().
+	// This contains values for where a *client* would connect to. It gets
+	// updated in the membership state functions.
+	endpoints etcdtypes.URLsMap // map[hostname]URLs
+
+	// memberIDs is a local cache of which cluster servers (peers) are
+	// associated with each memberID. It gets updated in the membership
+	// state functions. Note that unstarted members have an ID, but no name
+	// yet, so they aren't included here, since that key would be the empty
+	// string.
+	memberIDs map[string]uint64 // map[hostname]memberID
+
+	// behaviour mutexes
+	stateMutex     *sync.RWMutex // lock around all locally tracked state
+	orderingMutex  *sync.Mutex   // lock around non-concurrent changes
+	nominatedMutex *sync.Mutex   // lock around nominatedCb
+	volunteerMutex *sync.Mutex   // lock around volunteerCb
+
+	// client related
+	etcd          *etcd.Client
+	connectSignal chan struct{}        // TODO: use a SubscribedSignal instead?
+	client        *client.Simple       // provides useful helper methods
+	clients       []*client.Simple     // list of registered clients
+	session       *concurrency.Session // session that expires on disconnect
+	leaseID       etcd.LeaseID         // the leaseID used by this session
+
+	// server related
+	server            *embed.Etcd            // contains the server struct
+	serverID          uint64                 // uint64 because memberRemove uses that
+	serverwg          *sync.WaitGroup        // wait for server to shutdown
+	servermu          *sync.Mutex            // lock around destroy server
+	serverExit        *util.EasyExit         // exit signal
+	serverReadySignal *util.SubscribedSignal // signals when server is up and running
+	serverExitsSignal *util.SubscribedSignal // signals when runServer exits
+
+	// task queue state
+	taskQueue        []*task
+	taskQueueWg      *sync.WaitGroup
+	taskQueueLock    *sync.Mutex
+	taskQueueRunning bool
+	taskQueueID      int
 }
 
-// NewEmbdEtcd creates the top level embedded etcd struct client and server obj.
-func NewEmbdEtcd(hostname string, seeds, clientURLs, serverURLs, advertiseClientURLs, advertiseServerURLs etcdtypes.URLs, noServer bool, noNetwork bool, idealClusterSize uint16, flags Flags, prefix string, converger *converger.Coordinator) *EmbdEtcd {
-	endpoints := make(etcdtypes.URLsMap)
-	if hostname == seedSentinel { // safety
-		return nil
-	}
-	if noServer && len(seeds) == 0 {
-		log.Printf("Etcd: need at least one seed if running with --no-server!")
-		return nil
-	}
-	if noNetwork {
-		if len(clientURLs) != 0 || len(serverURLs) != 0 || len(seeds) != 0 {
-			log.Printf("--no-network is mutual exclusive with --seeds, --client-urls and --server-urls")
-			return nil
-		}
-		clientURLs, _ = etcdtypes.NewURLs([]string{"unix://clients.sock:0"})
-		serverURLs, _ = etcdtypes.NewURLs([]string{"unix://servers.sock:0"})
-	}
-
-	if len(seeds) > 0 {
-		endpoints[seedSentinel] = seeds
-		idealClusterSize = 0 // unset, get from running cluster
-	}
-	obj := &EmbdEtcd{
-		exitchan:   make(chan struct{}), // exit signal for main loop
-		exitchanCb: make(chan struct{}),
-		exitwg:     &sync.WaitGroup{},
-		awq:        make(chan *AW),
-		wevents:    make(chan *RE),
-		setq:       make(chan *KV),
-		getq:       make(chan *GQ),
-		delq:       make(chan *DL),
-		txnq:       make(chan *TN),
-
-		nominated: make(etcdtypes.URLsMap),
-
-		hostname:            hostname,
-		endpoints:           endpoints,
-		clientURLs:          clientURLs,
-		serverURLs:          serverURLs,
-		advertiseClientURLs: advertiseClientURLs,
-		advertiseServerURLs: advertiseServerURLs,
-		noServer:            noServer,
-		noNetwork:           noNetwork,
-
-		idealClusterSize: idealClusterSize,
-		converger:        converger,
-		flags:            flags,
-		prefix:           prefix,
-		dataDir:          path.Join(prefix, "etcd"),
-		serverReady:      make(chan struct{}),
-	}
-	// TODO: add some sort of auto assign method for picking these defaults
-	// add a default so that our local client can connect locally if needed
-	if len(obj.LocalhostClientURLs()) == 0 { // if we don't have any localhost URLs
-		u, err := url.Parse(DefaultClientURL)
-		if err != nil {
-			return nil // TODO: change interface to return an error
-		}
-		obj.clientURLs = append([]url.URL{*u}, obj.clientURLs...) // prepend
-	}
-
-	// add a default for local use and testing, harmless and useful!
-	if !obj.noServer && len(obj.serverURLs) == 0 {
-		if len(obj.endpoints) > 0 {
-			obj.noServer = true // we didn't have enough to be a server
-		}
-		u, err := url.Parse(DefaultServerURL) // default
-		if err != nil {
-			return nil // TODO: change interface to return an error
-		}
-		obj.serverURLs = []url.URL{*u}
-	}
-
-	if converger != nil {
-		converger.AddStateFn("etcd-hostname", func(converged bool) error {
-			// send our individual state into etcd for others to see
-			return SetHostnameConverged(obj, hostname, converged) // TODO: what should happen on error?
-		})
-	}
-
-	return obj
+// sessionTTLSec transforms the time representation into the nearest number of
+// seconds, which is needed by the etcd API.
+func sessionTTLSec(d time.Duration) int {
+	return int(d.Seconds())
 }
 
-// GetClient returns a handle to the raw etcd client object for those scenarios.
-func (obj *EmbdEtcd) GetClient() *etcd.Client {
-	return obj.client
-}
+// Validate the initial struct. This is called from Init, but can be used if you
+// would like to check your configuration is correct.
+func (obj *EmbdEtcd) Validate() error {
+	s := sessionTTLSec(SessionTTL)
+	if s <= 0 {
+		return fmt.Errorf("the SessionTTL const of %s (%d sec) must be greater than zero", SessionTTL.String(), s)
+	}
+	if s > etcd.MaxLeaseTTL {
+		return fmt.Errorf("the SessionTTL const of %s (%d sec) must be less than %d sec", SessionTTL.String(), s, etcd.MaxLeaseTTL)
+	}
 
-// GetConfig returns the config struct to be used for the etcd client connect.
-func (obj *EmbdEtcd) GetConfig() etcd.Config {
-	endpoints := []string{}
-	// XXX: filter out any urls which wouldn't resolve here ?
-	for _, eps := range obj.endpoints { // flatten map
-		for _, u := range eps {
-			endpoints = append(endpoints, u.String()) // use full url including scheme
-		}
+	if obj.Hostname == "" {
+		return fmt.Errorf("the Hostname was not specified")
 	}
-	sort.Strings(endpoints) // sort for determinism
-	cfg := etcd.Config{
-		Endpoints: endpoints,
-		// RetryDialer chooses the next endpoint to use
-		// it comes with a default dialer if unspecified
-		DialTimeout: 5 * time.Second,
-	}
-	return cfg
-}
 
-// Connect connects the client to a server, and then builds the *API structs.
-// If reconnect is true, it will force a reconnect with new config endpoints.
-func (obj *EmbdEtcd) Connect(reconnect bool) error {
-	if obj.flags.Debug {
-		log.Println("Etcd: Connect...")
+	if obj.NoServer && len(obj.Seeds) == 0 {
+		return fmt.Errorf("need at least one seed if NoServer is true")
 	}
-	obj.cLock.Lock()
-	defer obj.cLock.Unlock()
-	if obj.cError != nil { // stop on permanent error
-		return obj.cError
-	}
-	if obj.client != nil { // memoize
-		if reconnect {
-			// i think this requires the rLock when using it concurrently
-			err := obj.client.Close()
-			if err != nil {
-				log.Printf("Etcd: (Re)Connect: Close: Error: %+v", err)
-			}
-			obj.client = nil // for kicks
-		} else {
-			return nil
+
+	if !obj.NoServer { // you don't need a Chooser if there's no server...
+		if obj.Chooser == nil {
+			return fmt.Errorf("need to specify a Chooser implementation")
+		}
+		if err := obj.Chooser.Validate(); err != nil {
+			return errwrap.Wrapf(err, "the Chooser did not validate")
 		}
 	}
-	var emax uint16 // = 0
-	for {           // loop until connect
-		var err error
-		cfg := obj.GetConfig()
-		if eps := obj.endpoints; len(eps) > 0 {
-			log.Printf("Etcd: Connect: Endpoints: %v", eps)
-		} else {
-			log.Printf("Etcd: Connect: Endpoints: []")
+
+	if obj.NoNetwork {
+		if len(obj.Seeds) != 0 || len(obj.ClientURLs) != 0 || len(obj.ServerURLs) != 0 {
+			return fmt.Errorf("NoNetwork is mutually exclusive with Seeds, ClientURLs and ServerURLs")
 		}
-		obj.client, err = etcd.New(cfg) // connect!
-		if err == etcd.ErrNoAvailableEndpoints {
-			emax++
-			if emax > maxClientConnectRetries {
-				log.Printf("Etcd: The dataDir (%s) might be inconsistent or corrupt.", obj.dataDir)
-				log.Printf("Etcd: Please see: %s", "https://github.com/purpleidea/mgmt/blob/master/docs/faq.md#what-does-the-error-message-about-an-inconsistent-datadir-mean")
-				obj.cError = fmt.Errorf("can't find an available endpoint")
-				return obj.cError
-			}
-			err = &CtxDelayErr{time.Duration(emax) * time.Second, "No endpoints available yet!"} // retry with backoff...
-		}
-		if err != nil {
-			log.Printf("Etcd: Connect: CtxError...")
-			if _, e := obj.CtxError(context.TODO(), err); e != nil {
-				log.Printf("Etcd: Connect: CtxError: Fatal: %v", e)
-				obj.cError = e
-				return e // fatal error
-			}
-			continue
-		}
-		// check if we're actually connected here, because this must
-		// block if we're not connected
-		if obj.client == nil {
-			log.Printf("Etcd: Connect: Is nil!")
-			continue
-		}
-		break
 	}
+
+	if _, err := copyURLs(obj.Seeds); err != nil { // this will validate
+		return errwrap.Wrapf(err, "the Seeds are not valid")
+	}
+
+	if obj.NS == "/" {
+		return fmt.Errorf("the namespace should be empty instead of /")
+	}
+	if strings.HasSuffix(obj.NS, "/") {
+		return fmt.Errorf("the namespace should not end in /")
+	}
+
+	if obj.Prefix == "" || obj.Prefix == "/" {
+		return fmt.Errorf("the prefix of `%s` is invalid", obj.Prefix)
+	}
+
+	if obj.Logf == nil {
+		return fmt.Errorf("no Logf function was specified")
+	}
+
 	return nil
 }
 
-// Startup is the main entry point to kick off the embedded etcd client & server.
-func (obj *EmbdEtcd) Startup() error {
-	bootstrapping := len(obj.endpoints) == 0 // because value changes after start
+// Init initializes the struct after it has been populated as desired. You must
+// not use the struct if this returns an error.
+func (obj *EmbdEtcd) Init() error {
+	if err := obj.Validate(); err != nil {
+		return errwrap.Wrapf(err, "validate error")
+	}
 
-	// connect but don't block here, because servers might not be up yet...
+	if obj.ClientURLs == nil {
+		obj.ClientURLs = []url.URL{} // initialize
+	}
+	if obj.ServerURLs == nil {
+		obj.ServerURLs = []url.URL{}
+	}
+	if obj.AClientURLs == nil {
+		obj.AClientURLs = []url.URL{}
+	}
+	if obj.AServerURLs == nil {
+		obj.AServerURLs = []url.URL{}
+	}
+
+	curls, err := obj.curls()
+	if err != nil {
+		return err
+	}
+	surls, err := obj.surls()
+	if err != nil {
+		return err
+	}
+	if !obj.NoServer {
+		// add a default
+		if len(curls) == 0 {
+			u, err := url.Parse(DefaultClientURL)
+			if err != nil {
+				return err
+			}
+			obj.ClientURLs = []url.URL{*u}
+		}
+		// add a default for local use and testing, harmless and useful!
+		if len(surls) == 0 {
+			u, err := url.Parse(DefaultServerURL) // default
+			if err != nil {
+				return err
+			}
+			obj.ServerURLs = []url.URL{*u}
+		}
+
+		// TODO: if we don't have any localhost URLs, should we warn so
+		// that our local client can be able to connect more easily?
+		if len(localhostURLs(obj.ClientURLs)) == 0 {
+			u, err := url.Parse(DefaultClientURL)
+			if err != nil {
+				return err
+			}
+			obj.ClientURLs = append([]url.URL{*u}, obj.ClientURLs...) // prepend
+		}
+	}
+
+	if obj.NoNetwork {
+		var err error
+		// FIXME: convince etcd to store these files in our obj.Prefix!
+		obj.ClientURLs, err = etcdtypes.NewURLs([]string{"unix://clients.sock:0"})
+		if err != nil {
+			return err
+		}
+		obj.ServerURLs, err = etcdtypes.NewURLs([]string{"unix://servers.sock:0"})
+		if err != nil {
+			return err
+		}
+	}
+
+	if obj.Chooser != nil {
+		data := &chooser.Data{
+			Hostname: obj.Hostname,
+			Debug:    obj.Debug,
+			Logf: func(format string, v ...interface{}) {
+				obj.Logf("chooser: "+format, v...)
+			},
+		}
+		if err := obj.Chooser.Init(data); err != nil {
+			return errwrap.Wrapf(err, "error initializing chooser")
+		}
+	}
+
+	if err := os.MkdirAll(obj.Prefix, 0770); err != nil {
+		return errwrap.Wrapf(err, "couldn't mkdir: %s", obj.Prefix)
+	}
+
+	obj.wg = &sync.WaitGroup{}
+	obj.exit = util.NewEasyExit()
+	obj.hardexit = util.NewEasyExit()
+
+	obj.errChan = make(chan error)
+
+	obj.errExit1 = make(chan struct{})
+	obj.errExit2 = make(chan struct{})
+	obj.errExit3 = make(chan struct{})
+	obj.errExit4 = make(chan struct{})
+	obj.errExit5 = make(chan struct{})
+	obj.errExitN = make(chan struct{}) // done before call to runServer!
+	close(obj.errExitN)                // starts closed
+
+	//obj.activeExit1 = false
+	//obj.activeExit2 = false
+	//obj.activeExit3 = false
+	//obj.activeExit4 = false
+	//obj.activeExit5 = false
+	obj.activateExit1 = util.NewEasyAckOnce()
+	obj.activateExit2 = util.NewEasyAckOnce()
+	obj.activateExit3 = util.NewEasyAckOnce()
+	obj.activateExit4 = util.NewEasyAckOnce()
+	obj.activateExit5 = util.NewEasyAckOnce()
+
+	obj.readySignal = make(chan struct{})
+	obj.exitsSignal = make(chan struct{})
+
+	// locally tracked state
+	obj.nominated = make(etcdtypes.URLsMap)
+	obj.volunteers = make(etcdtypes.URLsMap)
+	obj.membermap = make(etcdtypes.URLsMap)
+	obj.endpoints = make(etcdtypes.URLsMap)
+	obj.memberIDs = make(map[string]uint64)
+
+	// behaviour mutexes
+	obj.stateMutex = &sync.RWMutex{}
+	// TODO: I'm not sure if orderingMutex is actually required or not...
+	obj.orderingMutex = &sync.Mutex{}
+	obj.nominatedMutex = &sync.Mutex{}
+	obj.volunteerMutex = &sync.Mutex{}
+
+	// client related
+	obj.connectSignal = make(chan struct{})
+	obj.clients = []*client.Simple{}
+
+	// server related
+	obj.serverwg = &sync.WaitGroup{}
+	obj.servermu = &sync.Mutex{}
+	obj.serverExit = util.NewEasyExit() // is reset after destroyServer exit
+	obj.serverReadySignal = &util.SubscribedSignal{}
+	obj.serverExitsSignal = &util.SubscribedSignal{}
+
+	// task queue state
+	obj.taskQueue = []*task{}
+	obj.taskQueueWg = &sync.WaitGroup{}
+	obj.taskQueueLock = &sync.Mutex{}
+
+	return nil
+}
+
+// Close cleans up after you are done using the struct.
+func (obj *EmbdEtcd) Close() error {
+	var reterr error
+
+	if obj.Chooser != nil {
+		reterr = errwrap.Append(reterr, obj.Chooser.Close())
+	}
+
+	return reterr
+}
+
+// curls returns the client urls that we should use everywhere except for
+// locally, where we prefer to use the non-advertised perspective.
+func (obj *EmbdEtcd) curls() (etcdtypes.URLs, error) {
+	// TODO: do we need the copy?
+	if len(obj.AClientURLs) > 0 {
+		return copyURLs(obj.AClientURLs)
+	}
+	return copyURLs(obj.ClientURLs)
+}
+
+// surls returns the server (peer) urls that we should use everywhere except for
+// locally, where we prefer to use the non-advertised perspective.
+func (obj *EmbdEtcd) surls() (etcdtypes.URLs, error) {
+	// TODO: do we need the copy?
+	if len(obj.AServerURLs) > 0 {
+		return copyURLs(obj.AServerURLs)
+	}
+	return copyURLs(obj.ServerURLs)
+}
+
+// err is an error helper that sends to the errChan.
+func (obj *EmbdEtcd) err(err error) {
+	select {
+	case obj.errChan <- err:
+	}
+}
+
+// Run is the main entry point to kick off the embedded etcd client and server.
+// It blocks until we've exited for shutdown. The shutdown can be triggered by
+// calling Destroy.
+func (obj *EmbdEtcd) Run() error {
+	curls, err := obj.curls()
+	if err != nil {
+		return err
+	}
+	surls, err := obj.surls()
+	if err != nil {
+		return err
+	}
+
+	exitCtx := obj.exit.Context() // local exit signal
+	obj.Logf("running...")
+	defer obj.Logf("exited!")
+	wg := &sync.WaitGroup{}
+	defer wg.Wait()
+	defer close(obj.exitsSignal)
+	defer obj.wg.Wait()
+	defer obj.exit.Done(nil) // unblock anything waiting for exit...
+	startupCtx, cancel := context.WithTimeout(exitCtx, RunStartupTimeout)
+	defer cancel()
+	defer obj.Logf("waiting for exit cleanup...") // TODO: is this useful?
+
+	// After we trigger a hardexit, wait for the ForceExitTimeout and then
+	// cancel any remaining stuck context's. This helps prevent angry users.
+	unblockCtx, runTimeout := context.WithCancel(context.Background())
+	defer runTimeout()
+	wg.Add(1)
 	go func() {
-		if err := obj.Connect(false); err != nil {
-			log.Printf("Etcd: Startup: Error: %v", err)
-			// XXX: Now cause Startup() to exit with error somehow!
+		defer wg.Done()
+		defer runTimeout()
+		select {
+		case <-obj.hardexit.Signal(): // bork unblocker
+		case <-obj.exitsSignal:
+		}
+
+		select {
+		case <-time.After(ForceExitTimeout):
+		case <-obj.exitsSignal:
 		}
 	}()
 
-	go obj.CbLoop() // start callback loop
-	go obj.Loop()   // start main loop
+	// main loop exit signal
+	obj.wg.Add(1)
+	go func() {
+		defer obj.wg.Done()
+		// when all the senders on errChan have exited, we can exit too
+		defer close(obj.errChan)
+		// these signals guard around the errChan close operation
+		wg := &sync.WaitGroup{}
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			// We wait here until we're notified to know whether or
+			// not this particular exit signal will be relevant...
+			// This is because during some runs, we might not use
+			// all of the signals, therefore we don't want to wait
+			// for them!
+			select {
+			case <-obj.activateExit1.Wait():
+			case <-exitCtx.Done():
+			}
+			if !obj.activeExit1 {
+				return
+			}
+			select {
+			case <-obj.errExit1:
+				if obj.Debug {
+					obj.Logf("exited connect loop (1)")
+				}
+			}
+		}()
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			select {
+			case <-obj.activateExit2.Wait():
+			case <-exitCtx.Done():
+			}
+			if !obj.activeExit2 {
+				return
+			}
+			select {
+			case <-obj.errExit2:
+				if obj.Debug {
+					obj.Logf("exited chooser loop (2)")
+				}
+			}
+		}()
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			select {
+			case <-obj.activateExit3.Wait():
+			case <-exitCtx.Done():
+			}
+			if !obj.activeExit3 {
+				return
+			}
+			select {
+			case <-obj.errExit3:
+				if obj.Debug {
+					obj.Logf("exited nominate loop (3)")
 
-	// TODO: implement native etcd watcher method on member API changes
-	path := fmt.Sprintf("%s/nominated/", NS)
-	go obj.AddWatcher(path, obj.nominateCallback, true, false, etcd.WithPrefix()) // no block
+				}
+			}
+		}()
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			select {
+			case <-obj.activateExit4.Wait():
+			case <-exitCtx.Done():
+			}
+			if !obj.activeExit4 {
+				return
+			}
+			select {
+			case <-obj.errExit4:
+				if obj.Debug {
+					obj.Logf("exited volunteer loop (4)")
+				}
+			}
+		}()
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			select {
+			case <-obj.activateExit5.Wait():
+			case <-exitCtx.Done():
+			}
+			if !obj.activeExit5 {
+				return
+			}
+			select {
+			case <-obj.errExit5:
+				if obj.Debug {
+					obj.Logf("exited endpoints loop (5)")
+				}
+			}
+		}()
+		wg.Wait() // wait for all the other exit signals before this one
+		select {
+		case <-obj.errExitN: // last one is for server (it can start/stop)
+			if obj.Debug {
+				obj.Logf("exited server loop (0)")
+			}
+		}
+	}()
 
-	// setup ideal cluster size watcher
-	key := fmt.Sprintf("%s/idealClusterSize", NS)
-	go obj.AddWatcher(key, obj.idealClusterSizeCallback, true, false) // no block
+	// main loop
+	var reterr error
+	obj.wg.Add(1)
+	go func() {
+		defer obj.wg.Done()
+	Loop:
+		for {
+			select {
+			case err, ok := <-obj.errChan:
+				if !ok { // when this closes, we can shutdown
+					break Loop
+				}
+				if err == nil {
+					err = fmt.Errorf("unexpected nil error")
+				}
+				obj.Logf("runtime error: %+v", err)
+				if reterr == nil { // happens only once
+					obj.exit.Done(err) // trigger an exit in Run!
+				}
+				reterr = errwrap.Append(reterr, err)
+			}
+		}
+	}()
 
-	// if we have no endpoints, it means we are bootstrapping...
-	if !bootstrapping {
-		log.Println("Etcd: Startup: Getting initial values...")
-		if nominated, err := Nominated(obj); err == nil {
-			obj.nominated = nominated // store a local copy
-		} else {
-			log.Printf("Etcd: Startup: Nominate lookup error.")
-			obj.Destroy()
-			return fmt.Errorf("Etcd: Startup: Error: %v", err)
+	bootstrapping := len(obj.Seeds) == 0 // we're the first, start a server!
+	canServer := !obj.NoServer
+
+	// Opportunistic "connect events" system, so that we can connect
+	// promiscuously when it's needed, instead of needing to linearize code.
+	obj.activeExit1 = true // activate errExit1
+	obj.activateExit1.Ack()
+	obj.wg.Add(1)
+	go func() {
+		defer obj.wg.Done()
+		defer close(obj.errExit1) // multi-signal for errChan close op
+		if bootstrapping {
+			serverReady, ackReady := obj.ServerReady()    // must call ack!
+			serverExited, ackExited := obj.ServerExited() // must call ack!
+			select {
+			case <-serverReady:
+				ackReady()  // must be called
+				ackExited() // must be called
+
+			case <-serverExited:
+				ackExited() // must be called
+				ackReady()  // must be called
+				// send an error in case server doesn't
+				// TODO: do we want this error to be sent?
+				obj.err(fmt.Errorf("server exited early"))
+				return
+
+			case <-obj.exit.Signal(): // exit early on exit signal
+				ackReady()  // must be called
+				ackExited() // must be called
+				return
+			}
 		}
 
-		// get initial ideal cluster size
-		if idealClusterSize, err := GetClusterSize(obj); err == nil {
-			obj.idealClusterSize = idealClusterSize
-			log.Printf("Etcd: Startup: Ideal cluster size is: %d", idealClusterSize)
-		} else {
-			// perhaps the first server didn't set it yet. it's ok,
-			// we can get it from the watcher if it ever gets set!
-			log.Printf("Etcd: Startup: Ideal cluster size lookup error.")
+		// Connect here. If we're bootstrapping, the server came up
+		// right above us. No need to add to our endpoints manually,
+		// that is done for us in the server start method.
+		if err := obj.connect(); err != nil {
+			obj.err(errwrap.Wrapf(err, "error during client connect"))
+			return
 		}
-	}
-
-	if !obj.noServer {
-		path := fmt.Sprintf("%s/volunteers/", NS)
-		go obj.AddWatcher(path, obj.volunteerCallback, true, false, etcd.WithPrefix()) // no block
-	}
-
-	// if i am alone and will have to be a server...
-	if !obj.noServer && bootstrapping {
-		log.Printf("Etcd: Bootstrapping...")
-		surls := obj.serverURLs
-		if len(obj.advertiseServerURLs) > 0 {
-			surls = obj.advertiseServerURLs
+		obj.client = client.NewClientFromClient(obj.etcd)
+		obj.client.Debug = obj.Debug
+		obj.client.Logf = func(format string, v ...interface{}) {
+			obj.Logf("client: "+format, v...)
 		}
-		// give an initial value to the obj.nominate map we keep in sync
-		// this emulates Nominate(obj, obj.hostname, obj.serverURLs)
-		obj.nominated[obj.hostname] = surls // initial value
-		// NOTE: when we are stuck waiting for the server to start up,
-		// it is probably happening on this call right here...
-		obj.nominateCallback(nil) // kick this off once
-	}
-
-	// self volunteer
-	if !obj.noServer && len(obj.serverURLs) > 0 {
-		// we run this in a go routine because it blocks waiting for server
-		surls := obj.serverURLs
-		if len(obj.advertiseServerURLs) > 0 {
-			surls = obj.advertiseServerURLs
+		if err := obj.client.Init(); err != nil {
+			obj.err(errwrap.Wrapf(err, "error during client init"))
+			return
 		}
-		log.Printf("Etcd: Startup: Volunteering...")
-		go Volunteer(obj, surls)
-	}
 
+		// Build a session for making leases that expire on disconnect!
+		options := []concurrency.SessionOption{
+			concurrency.WithTTL(sessionTTLSec(SessionTTL)),
+		}
+		if obj.leaseID > 0 { // in the odd chance we ever do reconnects
+			options = append(options, concurrency.WithLease(obj.leaseID))
+		}
+		obj.session, err = concurrency.NewSession(obj.etcd, options...)
+		if err != nil {
+			obj.err(errwrap.Wrapf(err, "could not create session"))
+			return
+		}
+		obj.leaseID = obj.session.Lease()
+
+		obj.Logf("connected!")
+		if !bootstrapping { // new clients need an initial state sync...
+			if err := obj.memberStateFromList(startupCtx); err != nil {
+				obj.err(errwrap.Wrapf(err, "error during initial state sync"))
+				return
+			}
+		}
+		close(obj.connectSignal)
+	}()
+	defer func() {
+		if obj.session != nil {
+			obj.session.Close() // this revokes the lease...
+		}
+
+		// run cleanup functions in reverse order
+		for i := len(obj.clients) - 1; i >= 0; i-- {
+			obj.clients[i].Close() // ignore errs
+		}
+		if obj.client != nil { // in case we bailed out early
+			obj.client.Close() // ignore err, but contains wg.Wait()
+		}
+		if obj.etcd == nil { // in case we bailed out early
+			return
+		}
+		obj.disconnect()
+		obj.Logf("disconnected!")
+		//close(obj.disconnectSignal)
+	}()
+
+	obj.Logf("watching chooser...")
+	chooserChan := make(chan error)
+	obj.activeExit2 = true // activate errExit2
+	obj.activateExit2.Ack()
+	obj.wg.Add(1)
+	go func() {
+		defer obj.wg.Done()
+		defer close(obj.errExit2) // multi-signal for errChan close op
+		if obj.Chooser == nil {
+			return
+		}
+
+		// wait till we're connected
+		select {
+		case <-obj.connectSignal:
+		case <-exitCtx.Done():
+			return // run exited early
+		}
+
+		p := obj.NS + ChooserPath
+		c, err := obj.MakeClientFromNamespace(p)
+		if err != nil {
+			obj.err(errwrap.Wrapf(err, "error during chooser init"))
+			return
+		}
+		if err := obj.Chooser.Connect(exitCtx, c); err != nil {
+			obj.err(errwrap.Wrapf(err, "error during chooser connect"))
+			return
+		}
+
+		ch, err := obj.Chooser.Watch()
+		if err != nil {
+			obj.err(errwrap.Wrapf(err, "error running chooser watch"))
+			return
+		}
+		chooserChan = ch // watch it
+	}()
+	defer func() {
+		if obj.Chooser == nil {
+			return
+		}
+		obj.Chooser.Disconnect() // ignore error if any
+	}()
+
+	// call this once to start the server so we'll be able to connect
 	if bootstrapping {
-		if err := SetClusterSize(obj, obj.idealClusterSize); err != nil {
-			log.Printf("Etcd: Startup: Ideal cluster size storage error.")
-			obj.Destroy()
-			return fmt.Errorf("Etcd: Startup: Error: %v", err)
+		obj.Logf("bootstrapping...")
+		obj.volunteers[obj.Hostname] = surls // bootstrap this!
+		obj.nominated[obj.Hostname] = surls
+		// alternatively we can bootstrap like this if we add more stuff...
+		//data := bootstrapWatcherData(obj.Hostname, surls) // server urls
+		//if err := obj.nominateApply(data); err != nil { // fake apply
+		//	return err
+		//}
+		// server starts inside here if bootstrapping!
+		if err := obj.nominateCb(startupCtx); err != nil {
+			// If while bootstrapping a new server, an existing one
+			// is running on the same port, then we error this here.
+			return err
+		}
+
+		// wait till we're connected
+		select {
+		case <-obj.connectSignal:
+		case <-exitCtx.Done():
+			// TODO: should this return an error?
+			return nil // run exited early
+		}
+
+		// advertise our new endpoint (comes paired after nominateCb)
+		if err := obj.advertise(startupCtx, obj.Hostname, curls); err != nil { // endpoints
+			return errwrap.Wrapf(err, "error with endpoints advertise")
+		}
+
+		// run to add entry into our public nominated datastructure
+		// FIXME: this might be redundant, but probably not harmful in
+		// our bootstrapping process... it will get done in volunteerCb
+		if err := obj.nominate(startupCtx, obj.Hostname, surls); err != nil {
+			return errwrap.Wrapf(err, "error nominating self")
 		}
 	}
 
-	go obj.AddWatcher(fmt.Sprintf("%s/endpoints/", NS), obj.endpointCallback, true, false, etcd.WithPrefix())
+	// If obj.NoServer, then we don't need to start up the nominate watcher,
+	// unless we're the first server... But we check that both are not true!
+	if bootstrapping || canServer {
+		if !bootstrapping && canServer { // wait for client!
+			select {
+			case <-obj.connectSignal:
+			case <-exitCtx.Done():
+				return nil // just exit
+			}
+		}
 
-	if err := obj.Connect(false); err != nil { // don't exit from this Startup function until connected!
+		ctx, cancel := context.WithCancel(unblockCtx)
+		defer cancel()
+		info, err := obj.client.ComplexWatcher(ctx, obj.NS+NominatedPath, etcd.WithPrefix())
+		if err != nil {
+			obj.activateExit3.Ack()
+			return errwrap.Wrapf(err, "error adding nominated watcher")
+		}
+		obj.Logf("watching nominees...")
+		obj.activeExit3 = true // activate errExit3
+		obj.activateExit3.Ack()
+		obj.wg.Add(1)
+		go func() {
+			defer obj.wg.Done()
+			defer close(obj.errExit3) // multi-signal for errChan close op
+			defer cancel()
+			for {
+				var event *interfaces.WatcherData
+				var ok bool
+				select {
+				case event, ok = <-info.Events:
+					if !ok {
+						return
+					}
+				}
+
+				if err := event.Err; err != nil {
+					obj.err(errwrap.Wrapf(err, "nominated watcher errored"))
+					continue
+				}
+
+				// on the initial created event, we populate...
+				if !bootstrapping && event.Created && len(event.Events) == 0 {
+					obj.Logf("populating nominated list...")
+					nominated, err := obj.getNominated(ctx)
+					if err != nil {
+						obj.err(errwrap.Wrapf(err, "get nominated errored"))
+						continue
+					}
+					obj.nominated = nominated
+
+				} else if err := obj.nominateApply(event); err != nil {
+					obj.err(errwrap.Wrapf(err, "nominate apply errored"))
+					continue
+				}
+
+				// decide the desired state before we change it
+				doStop := obj.serverAction(serverActionStop)
+				doStart := obj.serverAction(serverActionStart)
+
+				// server is running, but it should not be
+				if doStop { // stop?
+					// first, un advertise client urls
+					// TODO: should this cause destroy server instead? does it already?
+					if err := obj.advertise(ctx, obj.Hostname, nil); err != nil { // remove me
+						obj.err(errwrap.Wrapf(err, "error with endpoints unadvertise"))
+						continue
+					}
+				}
+
+				// runServer gets started in a goroutine here...
+				err := obj.nominateCb(ctx)
+				if obj.Debug {
+					obj.Logf("nominateCb: %+v", err)
+				}
+
+				if doStart { // start?
+					if err := obj.advertise(ctx, obj.Hostname, curls); err != nil { // add one
+						obj.err(errwrap.Wrapf(err, "error with endpoints advertise"))
+						continue
+					}
+				}
+
+				if err == interfaces.ErrShutdown {
+					if obj.Debug {
+						obj.Logf("nominated watcher shutdown")
+					}
+					return
+				}
+				if err == nil {
+					continue
+				}
+				obj.err(errwrap.Wrapf(err, "nominated watcher callback errored"))
+				continue
+			}
+		}()
+		defer func() {
+			// wait for unnominate of self to be seen...
+			select {
+			case <-obj.errExit3:
+			case <-obj.hardexit.Signal(): // bork unblocker
+				obj.Logf("unblocked unnominate signal")
+				// now unblock the server in case it's running!
+				if err := obj.destroyServer(); err != nil { // sync until exited
+					obj.err(errwrap.Wrapf(err, "destroyServer errored"))
+					return
+				}
+			}
+		}()
+		defer func() {
+			// wait for volunteer loop to exit
+			select {
+			case <-obj.errExit4:
+			}
+		}()
+	}
+	obj.activateExit3.Ack()
+
+	// volunteering code (volunteer callback and initial volunteering)
+	if !obj.NoServer && len(obj.ServerURLs) > 0 {
+		ctx, cancel := context.WithCancel(unblockCtx)
+		defer cancel() // cleanup on close...
+		info, err := obj.client.ComplexWatcher(ctx, obj.NS+VolunteerPath, etcd.WithPrefix())
+		if err != nil {
+			obj.activateExit4.Ack()
+			return errwrap.Wrapf(err, "error adding volunteer watcher")
+		}
+		unvolunteered := make(chan struct{})
+		obj.Logf("watching volunteers...")
+		obj.wg.Add(1)
+		obj.activeExit4 = true // activate errExit4
+		obj.activateExit4.Ack()
+		go func() {
+			defer obj.wg.Done()
+			defer close(obj.errExit4) // multi-signal for errChan close op
+			for {
+				var event *interfaces.WatcherData
+				var ok bool
+				select {
+				case event, ok = <-info.Events:
+					if !ok {
+						return
+					}
+					if err := event.Err; err != nil {
+						obj.err(errwrap.Wrapf(err, "volunteer watcher errored"))
+						continue
+					}
+
+				case chooserEvent, ok := <-chooserChan:
+					if !ok {
+						obj.Logf("got chooser shutdown...")
+						chooserChan = nil // done here!
+						continue
+					}
+					if chooserEvent != nil {
+						obj.err(errwrap.Wrapf(err, "chooser watcher errored"))
+						continue
+					}
+					obj.Logf("got chooser event...")
+					event = nil // pass through the apply...
+					// chooser events should poke volunteerCb
+				}
+
+				_, exists1 := obj.volunteers[obj.Hostname] // before
+
+				// on the initial created event, we populate...
+				if !bootstrapping && event != nil && event.Created && len(event.Events) == 0 {
+					obj.Logf("populating volunteers list...")
+					volunteers, err := obj.getVolunteers(ctx)
+					if err != nil {
+						obj.err(errwrap.Wrapf(err, "get volunteers errored"))
+						continue
+					}
+					// TODO: do we need to add ourself?
+					//_, exists := volunteers[obj.Hostname]
+					//if !exists {
+					//	volunteers[obj.Hostname] = surls
+					//}
+					obj.volunteers = volunteers
+
+				} else if err := obj.volunteerApply(event); event != nil && err != nil {
+					obj.err(errwrap.Wrapf(err, "volunteer apply errored"))
+					continue
+				}
+				_, exists2 := obj.volunteers[obj.Hostname] // after
+
+				err := obj.volunteerCb(ctx)
+				if err == nil {
+					// it was there, and it got removed
+					if exists1 && !exists2 {
+						close(unvolunteered)
+					}
+					continue
+				}
+				obj.err(errwrap.Wrapf(err, "volunteer watcher callback errored"))
+				continue
+			}
+		}()
+		defer func() {
+			// wait for unvolunteer of self to be seen...
+			select {
+			case <-unvolunteered:
+			case <-obj.hardexit.Signal(): // bork unblocker
+				obj.Logf("unblocked unvolunteer signal")
+			}
+		}()
+
+		// self volunteer
+		obj.Logf("volunteering...")
+		surls, err := obj.surls()
+		if err != nil {
+			return err
+		}
+		if err := obj.volunteer(ctx, surls); err != nil {
+			return err
+		}
+		defer obj.volunteer(ctx, nil) // unvolunteer
+		defer obj.Logf("unvolunteering...")
+		defer func() {
+			// Move the leader if I'm it, so that the member remove
+			// chooser operation happens on a different member than
+			// myself. A leaving member should not decide its fate.
+			member, err := obj.moveLeaderSomewhere(ctx)
+			if err != nil {
+				// TODO: use obj.err ?
+				obj.Logf("move leader failed with: %+v", err)
+				return
+			}
+			if member != "" {
+				obj.Logf("moved leader to: %s", member)
+			}
+		}()
+	}
+	obj.activateExit4.Ack()
+
+	// startup endpoints watcher (to learn about other servers)
+	ctx, cancel := context.WithCancel(unblockCtx)
+	defer cancel() // cleanup on close...
+	if err := obj.runEndpoints(ctx); err != nil {
+		obj.activateExit5.Ack()
 		return err
 	}
+	obj.activateExit5.Ack()
+	// We don't set state, we only watch others, so nothing to defer close!
+
+	if obj.Converger != nil {
+		obj.Converger.AddStateFn(ConvergerHostnameNamespace, func(converged bool) error {
+			// send our individual state into etcd for others to see
+			// TODO: what should happen on error?
+			return obj.setHostnameConverged(exitCtx, obj.Hostname, converged)
+		})
+		defer obj.Converger.RemoveStateFn(ConvergerHostnameNamespace)
+	}
+
+	// NOTE: Add anything else we want to start up here...
+
+	// If we get all the way down here, *and* we're connected, we're ready!
+	obj.wg.Add(1)
+	go func() {
+		defer obj.wg.Done()
+		select {
+		case <-obj.connectSignal:
+			close(obj.readySignal) // we're ready to be used now...
+		case <-exitCtx.Done():
+		}
+	}()
+
+	select {
+	case <-exitCtx.Done():
+	}
+	obj.closing = true // flag to let nominateCb know we're shutting down...
+	// kick off all the defer()'s....
+	return reterr
+}
+
+// runEndpoints is a helper function to move all of this code into a new block.
+func (obj *EmbdEtcd) runEndpoints(ctx context.Context) error {
+	bootstrapping := len(obj.Seeds) == 0
+	select {
+	case <-obj.connectSignal:
+	case <-ctx.Done():
+		return nil // TODO: just exit ?
+	}
+	info, err := obj.client.ComplexWatcher(ctx, obj.NS+EndpointsPath, etcd.WithPrefix())
+	if err != nil {
+		obj.activateExit5.Ack()
+		return errwrap.Wrapf(err, "error adding endpoints watcher")
+	}
+	obj.Logf("watching endpoints...")
+	obj.wg.Add(1)
+	obj.activeExit5 = true // activate errExit5
+	obj.activateExit5.Ack()
+	go func() {
+		defer obj.wg.Done()
+		defer close(obj.errExit5) // multi-signal for errChan close op
+		for {
+			var event *interfaces.WatcherData
+			var ok bool
+			select {
+			case event, ok = <-info.Events:
+				if !ok {
+					return
+				}
+				if err := event.Err; err != nil {
+					obj.err(errwrap.Wrapf(err, "endpoints watcher errored"))
+					continue
+				}
+			}
+
+			// on the initial created event, we populate...
+			if !bootstrapping && event.Created && len(event.Events) == 0 {
+				obj.Logf("populating endpoints list...")
+				endpoints, err := obj.getEndpoints(ctx)
+				if err != nil {
+					obj.err(errwrap.Wrapf(err, "get endpoints errored"))
+					continue
+				}
+				obj.endpoints = endpoints
+				obj.setEndpoints()
+
+			} else if err := obj.endpointApply(event); err != nil {
+				obj.err(errwrap.Wrapf(err, "endpoint apply errored"))
+				continue
+			}
+
+			// there is no endpoint callback necessary
+
+			// TODO: do we need this member state sync?
+			if err := obj.memberStateFromList(ctx); err != nil {
+				obj.err(errwrap.Wrapf(err, "error during endpoint state sync"))
+				continue
+			}
+		}
+	}()
+
 	return nil
 }
 
 // Destroy cleans up the entire embedded etcd system. Use DestroyServer if you
 // only want to shutdown the embedded server portion.
 func (obj *EmbdEtcd) Destroy() error {
+	obj.Logf("destroy...")
+	obj.exit.Done(nil) // trigger an exit in Run!
 
-	// this should also trigger an unnominate, which should cause a shutdown
-	log.Printf("Etcd: Destroy: Unvolunteering...")
-	if err := Volunteer(obj, nil); err != nil { // unvolunteer so we can shutdown...
-		log.Printf("Etcd: Destroy: Error: %v", err) // we have a problem
-	}
+	reterr := obj.exit.Error() // wait for exit signal (block until arrival)
 
-	obj.serverwg.Wait() // wait for server shutdown signal
-
-	obj.exiting = true // must happen before we run the cancel functions!
-
-	// clean up any watchers which might want to continue
-	obj.cancelLock.Lock() // TODO: do we really need the lock here on exit?
-	log.Printf("Etcd: Destroy: Cancelling %d operations...", len(obj.cancels))
-	for _, cancelFunc := range obj.cancels {
-		cancelFunc()
-	}
-	obj.cancelLock.Unlock()
-
-	close(obj.exitchan) // cause main loop to exit
-	close(obj.exitchanCb)
-
-	obj.rLock.Lock()
-	if obj.client != nil {
-		obj.client.Close()
-	}
-	obj.client = nil
-	obj.rLock.Unlock()
-
-	// this happens in response to the unnominate callback. not needed here!
-	//if obj.server != nil {
-	//	return obj.DestroyServer()
-	//}
-	obj.exitwg.Wait()
-	return nil
+	obj.wg.Wait()
+	return reterr
 }
 
-// CtxDelayErr requests a retry in Delta duration.
-type CtxDelayErr struct {
-	Delta   time.Duration
-	Message string
-}
-
-func (obj *CtxDelayErr) Error() string {
-	return fmt.Sprintf("CtxDelayErr(%v): %s", obj.Delta, obj.Message)
-}
-
-// CtxRetriesErr lets you retry as long as you have retries available.
-// TODO: consider combining this with CtxDelayErr
-type CtxRetriesErr struct {
-	Retries uint
-	Message string
-}
-
-func (obj *CtxRetriesErr) Error() string {
-	return fmt.Sprintf("CtxRetriesErr(%v): %s", obj.Retries, obj.Message)
-}
-
-// CtxPermanentErr is a permanent failure error to notify about borkage.
-type CtxPermanentErr struct {
-	Message string
-}
-
-func (obj *CtxPermanentErr) Error() string {
-	return fmt.Sprintf("CtxPermanentErr: %s", obj.Message)
-}
-
-// CtxReconnectErr requests a client reconnect to the new endpoint list.
-type CtxReconnectErr struct {
-	Message string
-}
-
-func (obj *CtxReconnectErr) Error() string {
-	return fmt.Sprintf("CtxReconnectErr: %s", obj.Message)
-}
-
-// CancelCtx adds a tracked cancel function around an existing context.
-func (obj *EmbdEtcd) CancelCtx(ctx context.Context) (context.Context, func()) {
-	cancelCtx, cancelFunc := context.WithCancel(ctx)
-	obj.cancelLock.Lock()
-	obj.cancels = append(obj.cancels, cancelFunc) // not thread-safe, needs lock
-	obj.cancelLock.Unlock()
-	return cancelCtx, cancelFunc
-}
-
-// TimeoutCtx adds a tracked cancel function with timeout around an existing context.
-func (obj *EmbdEtcd) TimeoutCtx(ctx context.Context, t time.Duration) (context.Context, func()) {
-	timeoutCtx, cancelFunc := context.WithTimeout(ctx, t)
-	obj.cancelLock.Lock()
-	obj.cancels = append(obj.cancels, cancelFunc) // not thread-safe, needs lock
-	obj.cancelLock.Unlock()
-	return timeoutCtx, cancelFunc
-}
-
-// CtxError is called whenever there is a connection or other client problem
-// that needs to be resolved before we can continue, eg: connection disconnected,
-// change of server to connect to, etc... It modifies the context if needed.
-func (obj *EmbdEtcd) CtxError(ctx context.Context, err error) (context.Context, error) {
-	if obj.ctxErr != nil { // stop on permanent error
-		return ctx, obj.ctxErr
-	}
-	type ctxKey string // use a non-basic type as ctx key (str can conflict)
-	const ctxErr ctxKey = "ctxErr"
-	const ctxIter ctxKey = "ctxIter"
-	expBackoff := func(tmin, texp, iter, tmax int) time.Duration {
-		// https://en.wikipedia.org/wiki/Exponential_backoff
-		// tmin <= texp^iter - 1 <= tmax // TODO: check my math
-		return time.Duration(math.Min(math.Max(math.Pow(float64(texp), float64(iter))-1.0, float64(tmin)), float64(tmax))) * time.Millisecond
-	}
-	var isTimeout bool
-	var iter int // = 0
-	if ctxerr, ok := ctx.Value(ctxErr).(error); ok {
-		if obj.flags.Debug {
-			log.Printf("Etcd: CtxError: err(%v), ctxerr(%v)", err, ctxerr)
-		}
-		if i, ok := ctx.Value(ctxIter).(int); ok {
-			iter = i + 1 // load and increment
-			if obj.flags.Debug {
-				log.Printf("Etcd: CtxError: Iter: %v", iter)
-			}
-		}
-		isTimeout = err == context.DeadlineExceeded
-		if obj.flags.Debug {
-			log.Printf("Etcd: CtxError: isTimeout: %v", isTimeout)
-		}
-		if !isTimeout {
-			iter = 0 // reset timer
-		}
-		err = ctxerr // restore error
-	} else if obj.flags.Debug {
-		log.Printf("Etcd: CtxError: No value found")
-	}
-	ctxHelper := func(tmin, texp, tmax int) context.Context {
-		t := expBackoff(tmin, texp, iter, tmax)
-		if obj.flags.Debug {
-			log.Printf("Etcd: CtxError: Timeout: %v", t)
-		}
-
-		ctxT, _ := obj.TimeoutCtx(ctx, t)
-		ctxV := context.WithValue(ctxT, ctxIter, iter) // save iter
-		ctxF := context.WithValue(ctxV, ctxErr, err)   // save err
-		return ctxF
-	}
-	_ = ctxHelper // TODO
-
-	isGrpc := func(e error) bool { // helper function
-		return grpc.ErrorDesc(err) == e.Error()
-	}
-
-	if err == nil {
-		log.Fatal("Etcd: CtxError: Error: Unexpected lack of error!")
-	}
-	if obj.exiting {
-		obj.ctxErr = fmt.Errorf("exit in progress")
-		return ctx, obj.ctxErr
-	}
-
-	// happens when we trigger the cancels during reconnect
-	if err == context.Canceled {
-		// TODO: do we want to create a fresh ctx here for all cancels?
-		//ctx = context.Background()
-		ctx, _ = obj.CancelCtx(ctx) // add a new one
-		return ctx, nil             // we should retry, reconnect probably happened
-	}
-
-	if delayErr, ok := err.(*CtxDelayErr); ok { // custom delay error
-		log.Printf("Etcd: CtxError: Reason: %s", delayErr.Error())
-		time.Sleep(delayErr.Delta) // sleep the amount of time requested
-		return ctx, nil
-	}
-
-	if retriesErr, ok := err.(*CtxRetriesErr); ok { // custom retry error
-		log.Printf("Etcd: CtxError: Reason: %s", retriesErr.Error())
-		if retriesErr.Retries == 0 {
-			obj.ctxErr = fmt.Errorf("no more retries due to CtxRetriesErr")
-			return ctx, obj.ctxErr
-		}
-		return ctx, nil
-	}
-
-	if permanentErr, ok := err.(*CtxPermanentErr); ok { // custom permanent error
-		obj.ctxErr = fmt.Errorf("error due to CtxPermanentErr: %s", permanentErr.Error())
-		return ctx, obj.ctxErr // quit
-	}
-
-	if err == etcd.ErrNoAvailableEndpoints { // etcd server is probably starting up
-		// TODO: tmin, texp, tmax := 500, 2, 16000 // ms, exp base, ms
-		// TODO: return ctxHelper(tmin, texp, tmax), nil
-		log.Printf("Etcd: CtxError: No endpoints available yet!")
-		time.Sleep(500 * time.Millisecond) // a ctx timeout won't help!
-		return ctx, nil                    // passthrough
-	}
-
-	// etcd server is apparently still starting up...
-	if err == rpctypes.ErrNotCapable { // isGrpc(rpctypes.ErrNotCapable) also matches
-		log.Printf("Etcd: CtxError: Server is starting up...")
-		time.Sleep(500 * time.Millisecond) // a ctx timeout won't help!
-		return ctx, nil                    // passthrough
-	}
-
-	if err == grpc.ErrClientConnTimeout { // sometimes caused by "too many colons" misconfiguration
-		return ctx, fmt.Errorf("misconfiguration: %v", err) // permanent failure?
-	}
-
-	// this can happen if my client connection shuts down, but without any
-	// available alternatives. in this case, rotate it off to someone else
-	reconnectErr, isReconnectErr := err.(*CtxReconnectErr) // custom reconnect error
-	switch {
-	case isReconnectErr:
-		log.Printf("Etcd: CtxError: Reason: %s", reconnectErr.Error())
-		fallthrough
-	case err == raft.ErrStopped: // TODO: does this ever happen?
-		fallthrough
-	case err == etcdserver.ErrStopped: // TODO: does this ever happen?
-		fallthrough
-	case isGrpc(raft.ErrStopped):
-		fallthrough
-	case isGrpc(etcdserver.ErrStopped):
-		fallthrough
-	case isGrpc(grpc.ErrClientConnClosing):
-
-		if obj.flags.Debug {
-			log.Printf("Etcd: CtxError: Error(%T): %+v", err, err)
-			log.Printf("Etcd: Endpoints are: %v", obj.client.Endpoints())
-			log.Printf("Etcd: Client endpoints are: %v", obj.endpoints)
-		}
-
-		if obj.flags.Debug {
-			log.Printf("Etcd: CtxError: Locking...")
-		}
-		obj.rLock.Lock()
-		// TODO: should this really be nested inside the other lock?
-		obj.cancelLock.Lock()
-		// we need to cancel any WIP connections like Txn()'s and so on
-		// we run the cancel()'s that are stored up so they don't block
-		log.Printf("Etcd: CtxError: Cancelling %d operations...", len(obj.cancels))
-		for _, cancelFunc := range obj.cancels {
-			cancelFunc()
-		}
-		obj.cancels = []func(){} // reset
-		obj.cancelLock.Unlock()
-
-		log.Printf("Etcd: CtxError: Reconnecting...")
-		if err := obj.Connect(true); err != nil {
-			defer obj.rLock.Unlock()
-			obj.ctxErr = fmt.Errorf("permanent connect error: %v", err)
-			return ctx, obj.ctxErr
-		}
-		if obj.flags.Debug {
-			log.Printf("Etcd: CtxError: Unlocking...")
-		}
-		obj.rLock.Unlock()
-		log.Printf("Etcd: CtxError: Reconnected!")
-		return ctx, nil
-	}
-
-	// FIXME: we might be one of the members in a two member cluster that
-	// had the other member crash.. hmmm bork?!
-	if isGrpc(context.DeadlineExceeded) {
-		log.Printf("Etcd: CtxError: DeadlineExceeded(%T): %+v", err, err) // TODO
-	}
-
-	if err == rpctypes.ErrDuplicateKey {
-		log.Fatalf("Etcd: CtxError: Programming error: %+v", err)
-	}
-
-	// if you hit this code path here, please report the unmatched error!
-	log.Printf("Etcd: CtxError: Unknown error(%T): %+v", err, err)
-	time.Sleep(1 * time.Second)
-	obj.ctxErr = fmt.Errorf("unknown CtxError")
-	return ctx, obj.ctxErr
-}
-
-// CbLoop is the loop where callback execution is serialized.
-func (obj *EmbdEtcd) CbLoop() {
-	obj.exitwg.Add(1)
-	defer obj.exitwg.Done()
-	cuid := obj.converger.Register()
-	defer cuid.Unregister()
-	if e := obj.Connect(false); e != nil {
-		return // fatal
-	}
-	var exitTimeout <-chan time.Time // = nil is implied
-	// we use this timer because when we ignore un-converge events and loop,
-	// we reset the ConvergedTimer case statement, ruining the timeout math!
-	cuid.StartTimer()
-	for {
-		ctx := context.Background() // TODO: inherit as input argument?
+// Interrupt causes this member to force shutdown. It does not safely wait for
+// an ordered shutdown. It is not recommended to use this unless you're borked.
+func (obj *EmbdEtcd) Interrupt() error {
+	obj.Logf("interrupt...")
+	wg := &sync.WaitGroup{}
+	var err error
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		err = obj.Destroy() // set return error
+	}()
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
 		select {
-		// etcd watcher event
-		case re := <-obj.wevents:
-			if !re.skipConv { // if we want to count it...
-				cuid.ResetTimer() // activity!
-			}
-			if obj.flags.Trace {
-				log.Printf("Trace: Etcd: CbLoop: Event: StartLoop")
-			}
-			for {
-				if obj.exiting { // the exit signal has been sent!
-					//re.resp.NACK() // nope!
-					break
-				}
-				if obj.flags.Trace {
-					log.Printf("Trace: Etcd: CbLoop: rawCallback()")
-				}
-				err := rawCallback(ctx, re)
-				if obj.flags.Trace {
-					log.Printf("Trace: Etcd: CbLoop: rawCallback(): %v", err)
-				}
-				if err == nil {
-					//re.resp.ACK() // success
-					break
-				}
-				re.retries++ // increment error retry count
-				if ctx, err = obj.CtxError(ctx, err); err != nil {
-					break // TODO: it's bad, break or return?
-				}
-			}
-			if obj.flags.Trace {
-				log.Printf("Trace: Etcd: CbLoop: Event: FinishLoop")
-			}
-
-		// exit loop signal
-		case <-obj.exitchanCb:
-			obj.exitchanCb = nil
-			log.Println("Etcd: Exiting loop shortly...")
-			// activate exitTimeout switch which only opens after N
-			// seconds of inactivity in this select switch, which
-			// lets everything get bled dry to avoid blocking calls
-			// which would otherwise block us from exiting cleanly!
-			exitTimeout = util.TimeAfterOrBlock(exitDelay)
-
-		// exit loop commit
-		case <-exitTimeout:
-			log.Println("Etcd: Exiting callback loop!")
-			cuid.StopTimer() // clean up nicely
-			return
+		case <-obj.exit.Signal(): // wait for Destroy to run first
 		}
-	}
-}
+		obj.hardexit.Done(nil) // trigger a hard exit
+	}()
 
-// Loop is the main loop where everything is serialized.
-func (obj *EmbdEtcd) Loop() {
-	obj.exitwg.Add(1) // TODO: add these to other go routines?
-	defer obj.exitwg.Done()
-	cuid := obj.converger.Register()
-	defer cuid.Unregister()
-	if e := obj.Connect(false); e != nil {
-		return // fatal
-	}
-	var exitTimeout <-chan time.Time // = nil is implied
-	cuid.StartTimer()
-	for {
-		ctx := context.Background() // TODO: inherit as input argument?
-		// priority channel...
-		select {
-		case aw := <-obj.awq:
-			cuid.ResetTimer() // activity!
-			if obj.flags.Trace {
-				log.Printf("Trace: Etcd: Loop: PriorityAW: StartLoop")
-			}
-			obj.loopProcessAW(ctx, aw)
-			if obj.flags.Trace {
-				log.Printf("Trace: Etcd: Loop: PriorityAW: FinishLoop")
-			}
-			continue // loop to drain the priority channel first!
-		default:
-			// passthrough to normal channel
-		}
-
-		select {
-		// add watcher
-		case aw := <-obj.awq:
-			cuid.ResetTimer() // activity!
-			if obj.flags.Trace {
-				log.Printf("Trace: Etcd: Loop: AW: StartLoop")
-			}
-			obj.loopProcessAW(ctx, aw)
-			if obj.flags.Trace {
-				log.Printf("Trace: Etcd: Loop: AW: FinishLoop")
-			}
-
-		// set kv pair
-		case kv := <-obj.setq:
-			cuid.ResetTimer() // activity!
-			if obj.flags.Trace {
-				log.Printf("Trace: Etcd: Loop: Set: StartLoop")
-			}
-			for {
-				if obj.exiting { // the exit signal has been sent!
-					kv.resp.NACK() // nope!
-					break
-				}
-				err := obj.rawSet(ctx, kv)
-				if err == nil {
-					kv.resp.ACK() // success
-					break
-				}
-				if ctx, err = obj.CtxError(ctx, err); err != nil { // try to reconnect, etc...
-					break // TODO: it's bad, break or return?
-				}
-			}
-			if obj.flags.Trace {
-				log.Printf("Trace: Etcd: Loop: Set: FinishLoop")
-			}
-
-		// get value
-		case gq := <-obj.getq:
-			if !gq.skipConv {
-				cuid.ResetTimer() // activity!
-			}
-			if obj.flags.Trace {
-				log.Printf("Trace: Etcd: Loop: Get: StartLoop")
-			}
-			for {
-				if obj.exiting { // the exit signal has been sent!
-					gq.resp.NACK() // nope!
-					break
-				}
-				data, err := obj.rawGet(ctx, gq)
-				if err == nil {
-					gq.data = data // update struct
-					gq.resp.ACK()  // success
-					break
-				}
-				if ctx, err = obj.CtxError(ctx, err); err != nil {
-					break // TODO: it's bad, break or return?
-				}
-			}
-			if obj.flags.Trace {
-				log.Printf("Trace: Etcd: Loop: Get: FinishLoop")
-			}
-
-		// delete value
-		case dl := <-obj.delq:
-			cuid.ResetTimer() // activity!
-			if obj.flags.Trace {
-				log.Printf("Trace: Etcd: Loop: Delete: StartLoop")
-			}
-			for {
-				if obj.exiting { // the exit signal has been sent!
-					dl.resp.NACK() // nope!
-					break
-				}
-				data, err := obj.rawDelete(ctx, dl)
-				if err == nil {
-					dl.data = data // update struct
-					dl.resp.ACK()  // success
-					break
-				}
-				if ctx, err = obj.CtxError(ctx, err); err != nil {
-					break // TODO: it's bad, break or return?
-				}
-			}
-			if obj.flags.Trace {
-				log.Printf("Trace: Etcd: Loop: Delete: FinishLoop")
-			}
-
-		// run txn
-		case tn := <-obj.txnq:
-			cuid.ResetTimer() // activity!
-			if obj.flags.Trace {
-				log.Printf("Trace: Etcd: Loop: Txn: StartLoop")
-			}
-			for {
-				if obj.exiting { // the exit signal has been sent!
-					tn.resp.NACK() // nope!
-					break
-				}
-				data, err := obj.rawTxn(ctx, tn)
-				if err == nil {
-					tn.data = data // update struct
-					tn.resp.ACK()  // success
-					break
-				}
-				if ctx, err = obj.CtxError(ctx, err); err != nil {
-					break // TODO: it's bad, break or return?
-				}
-			}
-			if obj.flags.Trace {
-				log.Printf("Trace: Etcd: Loop: Txn: FinishLoop")
-			}
-
-		// exit loop signal
-		case <-obj.exitchan:
-			obj.exitchan = nil
-			log.Println("Etcd: Exiting loop shortly...")
-			// activate exitTimeout switch which only opens after N
-			// seconds of inactivity in this select switch, which
-			// lets everything get bled dry to avoid blocking calls
-			// which would otherwise block us from exiting cleanly!
-			exitTimeout = util.TimeAfterOrBlock(exitDelay)
-
-		// exit loop commit
-		case <-exitTimeout:
-			log.Println("Etcd: Exiting loop!")
-			cuid.StopTimer() // clean up nicely
-			return
-		}
-	}
-}
-
-// loopProcessAW is a helper function to facilitate creating priority channels!
-func (obj *EmbdEtcd) loopProcessAW(ctx context.Context, aw *AW) {
-	for {
-		if obj.exiting { // the exit signal has been sent!
-			aw.resp.NACK() // nope!
-			return
-		}
-		// cancelFunc is our data payload
-		cancelFunc, err := obj.rawAddWatcher(ctx, aw)
-		if err == nil {
-			aw.cancelFunc = cancelFunc // update struct
-			aw.resp.ACK()              // success
-			return
-		}
-		if ctx, err = obj.CtxError(ctx, err); err != nil {
-			return // TODO: do something else ?
-		}
-	}
-}
-
-// Set queues up a set operation to occur using our mainloop.
-func (obj *EmbdEtcd) Set(key, value string, opts ...etcd.OpOption) error {
-	resp := event.NewResp()
-	obj.setq <- &KV{key: key, value: value, opts: opts, resp: resp}
-	if err := resp.Wait(); err != nil { // wait for ack/nack
-		return fmt.Errorf("Etcd: Set: Probably received an exit: %v", err)
-	}
-	return nil
-}
-
-// rawSet actually implements the key set operation.
-func (obj *EmbdEtcd) rawSet(ctx context.Context, kv *KV) error {
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: rawSet()")
-	}
-	// key is the full key path
-	// TODO: should this be : obj.client.KV.Put or obj.client.Put ?
-	obj.rLock.RLock() // these read locks need to wrap any use of obj.client
-	response, err := obj.client.KV.Put(ctx, kv.key, kv.value, kv.opts...)
-	obj.rLock.RUnlock()
-	log.Printf("Etcd: Set(%s): %v", kv.key, response) // w00t... bonus
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: rawSet(): %v", err)
-	}
+	wg.Wait()
 	return err
 }
 
-// Get performs a get operation and waits for an ACK to continue.
-func (obj *EmbdEtcd) Get(path string, opts ...etcd.OpOption) (map[string]string, error) {
-	return obj.ComplexGet(path, false, opts...)
+// Ready returns a channel that closes when we're up and running. This process
+// happens when calling Run. If Run is never called, this will never happen. Our
+// main startup must be running, and our client must be connected to get here.
+func (obj *EmbdEtcd) Ready() <-chan struct{} { return obj.readySignal }
+
+// Exited returns a channel that closes when we've destroyed. This process
+// happens after Run exits. If Run is never called, this will never happen.
+func (obj *EmbdEtcd) Exited() <-chan struct{} { return obj.exitsSignal }
+
+// config returns the config struct to be used during the etcd client connect.
+func (obj *EmbdEtcd) config() etcd.Config {
+	// FIXME: filter out any urls which wouldn't resolve ?
+	endpoints := fromURLsMapToStringList(obj.endpoints) // flatten map
+	// We don't need to do any sort of priority sort here, since for initial
+	// connect we'd be the only one, so it doesn't matter, and subsequent
+	// changes are made with SetEndpoints, not here, so we never need to
+	// prioritize our local endpoint.
+	sort.Strings(endpoints) // sort for determinism
+
+	if len(endpoints) == 0 { // initially, we need to use the defaults...
+		for _, u := range obj.Seeds {
+			endpoints = append(endpoints, u.String())
+		}
+	}
+	// XXX: connect to our local obj.ClientURLs instead of obj.AClientURLs ?
+	cfg := etcd.Config{
+		Endpoints: endpoints, // eg: []string{"http://254.0.0.1:12345"}
+		// RetryDialer chooses the next endpoint to use, and comes with
+		// a default dialer if unspecified.
+		DialTimeout: ClientDialTimeout,
+
+		// I think the keepalive stuff is needed for endpoint health.
+		DialKeepAliveTime:    ClientDialKeepAliveTime,
+		DialKeepAliveTimeout: ClientDialKeepAliveTimeout,
+
+		// 0 disables auto-sync. By default auto-sync is disabled.
+		AutoSyncInterval: 0, // we do something equivalent ourselves
+	}
+	return cfg
 }
 
-// ComplexGet performs a get operation and waits for an ACK to continue. It can
-// accept more arguments that are useful for the less common operations.
-// TODO: perhaps a get should never cause an un-converge ?
-func (obj *EmbdEtcd) ComplexGet(path string, skipConv bool, opts ...etcd.OpOption) (map[string]string, error) {
-	resp := event.NewResp()
-	gq := &GQ{path: path, skipConv: skipConv, opts: opts, resp: resp, data: nil}
-	obj.getq <- gq                      // send
-	if err := resp.Wait(); err != nil { // wait for ack/nack
-		return nil, fmt.Errorf("Etcd: Get: Probably received an exit: %v", err)
+// connect connects the client to a server. If we are the first peer, then that
+// server is itself.
+func (obj *EmbdEtcd) connect() error {
+	obj.Logf("connect...")
+	if obj.etcd != nil {
+		return fmt.Errorf("already connected")
 	}
-	return gq.data, nil
+	cfg := obj.config() // get config
+	var err error
+	obj.etcd, err = etcd.New(cfg) // connect!
+	return err
 }
 
-func (obj *EmbdEtcd) rawGet(ctx context.Context, gq *GQ) (result map[string]string, err error) {
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: rawGet()")
+// disconnect closes the etcd connection.
+func (obj *EmbdEtcd) disconnect() error {
+	obj.Logf("disconnect...")
+	if obj.etcd == nil {
+		return fmt.Errorf("already disconnected")
 	}
-	obj.rLock.RLock()
-	// TODO: we're checking if this is nil to workaround a nil ptr bug...
-	if obj.client == nil { // bug?
-		obj.rLock.RUnlock()
-		return nil, fmt.Errorf("client is nil")
-	}
-	if obj.client.KV == nil { // bug?
-		obj.rLock.RUnlock()
-		return nil, fmt.Errorf("client.KV is nil")
-	}
-	response, err := obj.client.KV.Get(ctx, gq.path, gq.opts...)
-	obj.rLock.RUnlock()
-	if err != nil || response == nil {
+
+	return obj.etcd.Close()
+}
+
+// MakeClient returns an etcd Client interface that is suitable for basic tasks.
+// Don't run this until the Ready method has acknowledged.
+func (obj *EmbdEtcd) MakeClient() (interfaces.Client, error) {
+	c := client.NewClientFromClient(obj.etcd)
+	if err := c.Init(); err != nil {
 		return nil, err
 	}
-
-	// TODO: write a response.ToMap() function on https://godoc.org/github.com/coreos/etcd/etcdserver/etcdserverpb#RangeResponse
-	result = make(map[string]string)
-	for _, x := range response.Kvs {
-		result[bytes.NewBuffer(x.Key).String()] = bytes.NewBuffer(x.Value).String()
-	}
-
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: rawGet(): %v", result)
-	}
-	return
+	obj.clients = append(obj.clients, c) // make sure to clean up after...
+	return c, nil
 }
 
-// Delete performs a delete operation and waits for an ACK to continue.
-func (obj *EmbdEtcd) Delete(path string, opts ...etcd.OpOption) (int64, error) {
-	resp := event.NewResp()
-	dl := &DL{path: path, opts: opts, resp: resp, data: -1}
-	obj.delq <- dl                      // send
-	if err := resp.Wait(); err != nil { // wait for ack/nack
-		return -1, fmt.Errorf("Etcd: Delete: Probably received an exit: %v", err)
+// MakeClientFromNamespace returns an etcd Client interface that is suitable for
+// basic tasks and that has a key namespace prefix. // Don't run this until the
+// Ready method has acknowledged.
+func (obj *EmbdEtcd) MakeClientFromNamespace(ns string) (interfaces.Client, error) {
+	kv := namespace.NewKV(obj.etcd.KV, ns)
+	w := namespace.NewWatcher(obj.etcd.Watcher, ns)
+	c := client.NewClientFromNamespace(obj.etcd, kv, w)
+	if err := c.Init(); err != nil {
+		return nil, err
 	}
-	return dl.data, nil
-}
-
-func (obj *EmbdEtcd) rawDelete(ctx context.Context, dl *DL) (count int64, err error) {
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: rawDelete()")
-	}
-	count = -1
-	obj.rLock.RLock()
-	response, err := obj.client.KV.Delete(ctx, dl.path, dl.opts...)
-	obj.rLock.RUnlock()
-	if err == nil {
-		count = response.Deleted
-	}
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: rawDelete(): %v", err)
-	}
-	return
-}
-
-// Txn performs a transaction and waits for an ACK to continue.
-func (obj *EmbdEtcd) Txn(ifcmps []etcd.Cmp, thenops, elseops []etcd.Op) (*etcd.TxnResponse, error) {
-	resp := event.NewResp()
-	tn := &TN{ifcmps: ifcmps, thenops: thenops, elseops: elseops, resp: resp, data: nil}
-	obj.txnq <- tn                      // send
-	if err := resp.Wait(); err != nil { // wait for ack/nack
-		return nil, fmt.Errorf("Etcd: Txn: Probably received an exit: %v", err)
-	}
-	return tn.data, nil
-}
-
-func (obj *EmbdEtcd) rawTxn(ctx context.Context, tn *TN) (*etcd.TxnResponse, error) {
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: rawTxn()")
-	}
-	obj.rLock.RLock()
-	response, err := obj.client.KV.Txn(ctx).If(tn.ifcmps...).Then(tn.thenops...).Else(tn.elseops...).Commit()
-	obj.rLock.RUnlock()
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: rawTxn(): %v, %v", response, err)
-	}
-	return response, err
-}
-
-// AddWatcher queues up an add watcher request and returns a cancel function.
-// Remember to add the etcd.WithPrefix() option if you want to watch recursively.
-func (obj *EmbdEtcd) AddWatcher(path string, callback func(re *RE) error, errCheck bool, skipConv bool, opts ...etcd.OpOption) (func(), error) {
-	resp := event.NewResp()
-	awq := &AW{path: path, opts: opts, callback: callback, errCheck: errCheck, skipConv: skipConv, cancelFunc: nil, resp: resp}
-	obj.awq <- awq                      // send
-	if err := resp.Wait(); err != nil { // wait for ack/nack
-		return nil, fmt.Errorf("Etcd: AddWatcher: Got NACK: %v", err)
-	}
-	return awq.cancelFunc, nil
-}
-
-// rawAddWatcher adds a watcher and returns a cancel function to call to end it.
-func (obj *EmbdEtcd) rawAddWatcher(ctx context.Context, aw *AW) (func(), error) {
-	cancelCtx, cancelFunc := obj.CancelCtx(ctx)
-	go func(ctx context.Context) {
-		defer cancelFunc() // it's safe to cancelFunc() more than once!
-		obj.rLock.RLock()
-		rch := obj.client.Watcher.Watch(ctx, aw.path, aw.opts...)
-		obj.rLock.RUnlock()
-		var rev int64
-		var useRev = false
-		var retry, locked bool = false, false
-		for {
-			response := <-rch // read
-			err := response.Err()
-			isCanceled := response.Canceled || err == context.Canceled
-			if response.Header.Revision == 0 { // by inspection
-				if obj.flags.Debug {
-					log.Printf("Etcd: Watch: Received empty message!") // switched client connection
-				}
-				isCanceled = true
-			}
-
-			if isCanceled {
-				if obj.exiting { // if not, it could be reconnect
-					return
-				}
-				err = context.Canceled
-			}
-
-			if err == nil { // watch from latest good revision
-				rev = response.Header.Revision // TODO: +1 ?
-				useRev = true
-				if !locked {
-					retry = false
-				}
-				locked = false
-			} else {
-				if obj.flags.Debug {
-					log.Printf("Etcd: Watch: Error: %v", err) // probably fixable
-				}
-				// this new context is the fix for a tricky set
-				// of bugs which were encountered when re-using
-				// the existing canceled context! it has state!
-				ctx = context.Background() // this is critical!
-
-				if ctx, err = obj.CtxError(ctx, err); err != nil {
-					return // TODO: it's bad, break or return?
-				}
-
-				// remake it, but add old Rev when valid
-				opts := []etcd.OpOption{}
-				if useRev {
-					opts = append(opts, etcd.WithRev(rev))
-				}
-				opts = append(opts, aw.opts...)
-				rch = nil
-				obj.rLock.RLock()
-				if obj.client == nil {
-					defer obj.rLock.RUnlock()
-					return // we're exiting
-				}
-				rch = obj.client.Watcher.Watch(ctx, aw.path, opts...)
-				obj.rLock.RUnlock()
-				locked = true
-				retry = true
-				continue
-			}
-
-			// the response includes a list of grouped events, each
-			// of which includes one Kv struct. Send these all in a
-			// batched group so that they are processed together...
-			obj.wevents <- &RE{response: response, path: aw.path, err: err, callback: aw.callback, errCheck: aw.errCheck, skipConv: aw.skipConv, retryHint: retry} // send event
-		}
-	}(cancelCtx)
-	return cancelFunc, nil
-}
-
-// rawCallback is the companion to AddWatcher which runs the callback processing.
-func rawCallback(ctx context.Context, re *RE) error {
-	var err = re.err // the watch event itself might have had an error
-	if err == nil {
-		if callback := re.callback; callback != nil {
-			// TODO: we could add an async option if needed
-			// NOTE: the callback must *not* block!
-			// FIXME: do we need to pass ctx in via *RE, or in the callback signature ?
-			err = callback(re) // run the callback
-			if !re.errCheck || err == nil {
-				return nil
-			}
-		} else {
-			return nil
-		}
-	}
-	return err
-}
-
-// volunteerCallback runs to respond to the volunteer list change events.
-// Functionally, it controls the adding and removing of members.
-// FIXME: we might need to respond to member change/disconnect/shutdown events,
-// see: https://github.com/coreos/etcd/issues/5277
-func (obj *EmbdEtcd) volunteerCallback(re *RE) error {
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: volunteerCallback()")
-		defer log.Printf("Trace: Etcd: volunteerCallback(): Finished!")
-	}
-	if err := obj.Connect(false); err != nil {
-		log.Printf("Etcd: volunteerCallback(): Connect failed permanently: %v", err)
-		// permanently fail...
-		return &CtxPermanentErr{fmt.Sprintf("Etcd: volunteerCallback(): Connect error: %s", err)}
-	}
-	var err error
-
-	// FIXME: if this is running in response to our OWN volunteering offer,
-	// skip doing stuff if we're not a server yet because it's pointless,
-	// and we might have just lost quorum if we just got nominated! Why the
-	// lack of quorum is needed to read data in etcd v3 but not in v2 is a
-	// mystery for now, since in v3 this now blocks! Maybe it's that the
-	// Maintenance.Status API requires a leader to return? Maybe that's it!
-	// FIXME: are there any situations where we don't want to short circuit
-	// here, such as if i'm the last node?
-	if obj.server == nil {
-		return nil // if we're not a server, we're not a leader, return
-	}
-
-	membersMap, err := Members(obj) // map[uint64]string
-	if err != nil {
-		return fmt.Errorf("Etcd: Members: Error: %+v", err)
-	}
-	members := util.StrMapValuesUint64(membersMap) // get values
-	log.Printf("Etcd: Members: List: %+v", members)
-
-	// we only do *one* change operation at a time so that the cluster can
-	// advance safely. we ensure this by returning CtxDelayErr any time an
-	// operation happens to ensure the function will reschedule itself due
-	// to the CtxError processing after this callback "fails". This custom
-	// error is caught by CtxError, and lets us specify a retry delay too!
-
-	// check for unstarted members, since we're currently "unhealthy"
-	for mID, name := range membersMap {
-		if name == "" {
-			// reschedule in one second
-			// XXX: will the unnominate TTL still happen if we are
-			// in an unhealthy state? that's what we're waiting for
-			return &CtxDelayErr{2 * time.Second, fmt.Sprintf("unstarted member, mID: %d", mID)}
-		}
-	}
-
-	leader, err := Leader(obj) // XXX: race!
-	if err != nil {
-		log.Printf("Etcd: Leader: Error: %+v", err)
-		return fmt.Errorf("Etcd: Leader: Error: %+v", err)
-	}
-	log.Printf("Etcd: Leader: %+v", leader)
-	if leader != obj.hostname {
-		log.Printf("Etcd: We are not the leader...")
-		return nil
-	}
-	// i am the leader!
-
-	// get the list of available volunteers
-	volunteersMap, err := Volunteers(obj)
-	if err != nil {
-		log.Printf("Etcd: Volunteers: Error: %+v", err)
-		return fmt.Errorf("Etcd: Volunteers: Error: %+v", err)
-	}
-
-	volunteers := []string{} // get keys
-	for k := range volunteersMap {
-		volunteers = append(volunteers, k)
-	}
-	sort.Strings(volunteers) // deterministic order
-	log.Printf("Etcd: Volunteers: %v", volunteers)
-
-	// unnominate anyone that unvolunteers, so that they can shutdown cleanly
-	quitters := util.StrFilterElementsInList(volunteers, members)
-	log.Printf("Etcd: Quitters: %v", quitters)
-
-	// if we're the only member left, just shutdown...
-	if len(members) == 1 && members[0] == obj.hostname && len(quitters) == 1 && quitters[0] == obj.hostname {
-		log.Printf("Etcd: Quitters: Shutting down self...")
-		if err := Nominate(obj, obj.hostname, nil); err != nil { // unnominate myself
-			return &CtxDelayErr{1 * time.Second, fmt.Sprintf("error shutting down self: %v", err)}
-		}
-		return nil
-	}
-
-	candidates := util.StrFilterElementsInList(members, volunteers)
-	log.Printf("Etcd: Candidates: %v", candidates)
-
-	// TODO: switch to < 0 so that we can shut the whole cluster down with 0
-	if obj.idealClusterSize < 1 { // safety in case value is not ready yet
-		return &CtxDelayErr{1 * time.Second, "The idealClusterSize is < 1."} // retry in one second
-	}
-
-	// do we need more members?
-	if len(candidates) > 0 && len(members)-len(quitters) < int(obj.idealClusterSize) {
-		chosen := candidates[0]           // XXX: use a better picker algorithm
-		peerURLs := volunteersMap[chosen] // comma separated list of urls
-
-		// NOTE: storing peerURLs when they're already in volunteers/ is
-		// redundant, but it seems to be necessary for a sane algorithm.
-		// nominate before we call the API so that members see it first!
-		Nominate(obj, chosen, peerURLs)
-		// XXX: add a ttl here, because once we nominate someone, we
-		// need to give them up to N seconds to start up after we run
-		// the MemberAdd API because if they don't, in some situations
-		// such as if we're adding the second node to the cluster, then
-		// we've lost quorum until a second member joins! If the TTL
-		// expires, we need to MemberRemove! In this special case, we
-		// need to forcefully remove the second member if we don't add
-		// them, because we'll be in a lack of quorum state and unable
-		// to do anything... As a result, we should always only add ONE
-		// member at a time!
-
-		log.Printf("Etcd: Member Add: %v", peerURLs)
-		mresp, err := MemberAdd(obj, peerURLs)
-		if err != nil {
-			// on error this function will run again, which is good
-			// because we need to make sure to run the below parts!
-			return fmt.Errorf("Etcd: Member Add: Error: %+v", err)
-		}
-		log.Printf("Etcd: Member Add: %+v", mresp.Member.PeerURLs)
-		// return and reschedule to check for unstarted members, etc...
-		return &CtxDelayErr{1 * time.Second, fmt.Sprintf("Member %s added successfully!", chosen)} // retry asap
-
-	} else if len(quitters) == 0 && len(members) > int(obj.idealClusterSize) { // too many members
-		for _, kicked := range members {
-			// don't kick ourself unless we are the only one left...
-			if kicked != obj.hostname || (obj.idealClusterSize == 0 && len(members) == 1) {
-				quitters = []string{kicked} // XXX: use a better picker algorithm
-				log.Printf("Etcd: Extras: %v", quitters)
-				break
-			}
-		}
-	}
-
-	// we must remove them from the members API or it will look like a crash
-	if lq := len(quitters); lq > 0 {
-		log.Printf("Etcd: Quitters: Shutting down %d members...", lq)
-	}
-	for _, quitter := range quitters {
-		mID, ok := util.Uint64KeyFromStrInMap(quitter, membersMap)
-		if !ok {
-			// programming error
-			log.Fatalf("Etcd: Member Remove: Error: %v(%v) not in members list!", quitter, mID)
-		}
-		Nominate(obj, quitter, nil) // unnominate
-		// once we issue the above unnominate, that peer will
-		// shutdown, and this might cause us to loose quorum,
-		// therefore, let that member remove itself, and then
-		// double check that it did happen in case delinquent
-		// TODO: get built-in transactional member Add/Remove
-		// functionality to avoid a separate nominate list...
-		if quitter == obj.hostname { // remove in unnominate!
-			log.Printf("Etcd: Quitters: Removing self...")
-			continue // TODO: CtxDelayErr ?
-		}
-
-		log.Printf("Etcd: Waiting %d seconds for %s to self remove...", selfRemoveTimeout, quitter)
-		time.Sleep(selfRemoveTimeout * time.Second)
-		// in case the removed member doesn't remove itself, do it!
-		removed, err := MemberRemove(obj, mID)
-		if err != nil {
-			return fmt.Errorf("Etcd: Member Remove: Error: %+v", err)
-		}
-		if removed {
-			log.Printf("Etcd: Member Removed (forced): %v(%v)", quitter, mID)
-		}
-
-		// Remove the endpoint from our list to avoid blocking
-		// future MemberList calls which would try and connect
-		// to a missing endpoint... The endpoints should get
-		// updated from the member exiting safely if it doesn't
-		// crash, but if it did and/or since it's a race to see
-		// if the update event will get seen before we need the
-		// new data, just do it now anyways, then update the
-		// endpoint list and trigger a reconnect.
-		delete(obj.endpoints, quitter) // proactively delete it
-		obj.endpointCallback(nil)      // update!
-		log.Printf("Member %s (%d) removed successfully!", quitter, mID)
-		return &CtxReconnectErr{"a member was removed"} // retry asap and update endpoint list
-	}
-
-	return nil
-}
-
-// nominateCallback runs to respond to the nomination list change events.
-// Functionally, it controls the starting and stopping of the server process.
-func (obj *EmbdEtcd) nominateCallback(re *RE) error {
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: nominateCallback()")
-		defer log.Printf("Trace: Etcd: nominateCallback(): Finished!")
-	}
-	bootstrapping := len(obj.endpoints) == 0
-	var revision int64 // = 0
-	if re != nil {
-		revision = re.response.Header.Revision
-	}
-	if !bootstrapping && (re == nil || revision != obj.lastRevision) {
-		// don't reprocess if we've already processed this message
-		// this can happen if the callback errors and is re-called
-		obj.lastRevision = revision
-
-		// if we tried to lookup the nominated members here (in etcd v3)
-		// this would sometimes block because we would loose the cluster
-		// leader once the current leader calls the MemberAdd API and it
-		// steps down trying to form a two host cluster. Instead, we can
-		// look at the event response data to read the nominated values!
-		//nominated, err = Nominated(obj) // nope, won't always work
-		// since we only see what has *changed* in the response data, we
-		// have to keep track of the original state and apply the deltas
-		// this must be idempotent in case it errors and is called again
-		// if we're retrying and we get a data format error, it's normal
-		nominated := obj.nominated
-		if nominated, err := ApplyDeltaEvents(re, nominated); err == nil {
-			obj.nominated = nominated
-		} else if !re.retryHint || err != errApplyDeltaEventsInconsistent {
-			log.Fatal(err)
-		}
-
-	} else {
-		// TODO: should we just use the above delta method for everything?
-		//nominated, err := Nominated(obj) // just get it
-		//if err != nil {
-		//	return fmt.Errorf("Etcd: Nominate: Error: %+v", err)
-		//}
-		//obj.nominated = nominated // update our local copy
-	}
-	if n := obj.nominated; len(n) > 0 {
-		log.Printf("Etcd: Nominated: %+v", n)
-	} else {
-		log.Printf("Etcd: Nominated: []")
-	}
-
-	// if there are no other peers, we create a new server
-	_, exists := obj.nominated[obj.hostname]
-	// FIXME: can we get rid of the len(obj.nominated) == 0 ?
-	newCluster := len(obj.nominated) == 0 || (len(obj.nominated) == 1 && exists)
-	if obj.flags.Debug {
-		log.Printf("Etcd: nominateCallback(): newCluster: %v; exists: %v; obj.server == nil: %t", newCluster, exists, obj.server == nil)
-	}
-	// XXX: check if i have actually volunteered first of all...
-	if obj.server == nil && (newCluster || exists) {
-
-		log.Printf("Etcd: StartServer(newCluster: %t): %+v", newCluster, obj.nominated)
-		err := obj.StartServer(
-			newCluster,    // newCluster
-			obj.nominated, // other peer members and urls or empty map
-		)
-		if err != nil {
-			var retries uint
-			if re != nil {
-				retries = re.retries
-			}
-			// retry MaxStartServerRetries times, then permanently fail
-			return &CtxRetriesErr{MaxStartServerRetries - retries, fmt.Sprintf("Etcd: StartServer: Error: %+v", err)}
-		}
-
-		if len(obj.endpoints) == 0 {
-			// add server to obj.endpoints list...
-			addresses := obj.LocalhostClientURLs()
-			if len(addresses) == 0 {
-				// probably a programming error...
-				log.Fatal("Etcd: No valid clientUrls exist!")
-			}
-			obj.endpoints[obj.hostname] = addresses // now we have some!
-			// client connects to one of the obj.endpoints servers...
-			log.Printf("Etcd: Addresses are: %s", addresses)
-
-			surls := obj.serverURLs
-			if len(obj.advertiseServerURLs) > 0 {
-				surls = obj.advertiseServerURLs
-			}
-			// XXX: just put this wherever for now so we don't block
-			// nominate self so "member" list is correct for peers to see
-			Nominate(obj, obj.hostname, surls)
-			// XXX: if this fails, where will we retry this part ?
-		}
-
-		// advertise client urls
-		if curls := obj.clientURLs; len(curls) > 0 {
-			if len(obj.advertiseClientURLs) > 0 {
-				curls = obj.advertiseClientURLs
-			}
-			// XXX: don't advertise local addresses! 127.0.0.1:2381 doesn't really help remote hosts
-			// XXX: but sometimes this is what we want... hmmm how do we decide? filter on callback?
-			AdvertiseEndpoints(obj, curls)
-			// XXX: if this fails, where will we retry this part ?
-
-			// force this to remove sentinel before we reconnect...
-			obj.endpointCallback(nil)
-		}
-
-		return &CtxReconnectErr{"local server is running"} // trigger reconnect to self
-
-	} else if obj.server != nil && !exists {
-		// un advertise client urls
-		AdvertiseEndpoints(obj, nil)
-
-		// i have been un-nominated, remove self and shutdown server!
-		if len(obj.nominated) != 0 { // don't call if nobody left but me!
-			// this works around: https://github.com/coreos/etcd/issues/5482,
-			// and it probably makes sense to avoid calling if we're the last
-			log.Printf("Etcd: Member Remove: Removing self: %v", obj.memberID)
-			removed, err := MemberRemove(obj, obj.memberID)
-			if err != nil {
-				return fmt.Errorf("Etcd: Member Remove: Error: %+v", err)
-			}
-			if removed {
-				log.Printf("Etcd: Member Removed (self): %v(%v)", obj.hostname, obj.memberID)
-			}
-		}
-
-		log.Printf("Etcd: DestroyServer...")
-		obj.DestroyServer()
-		// TODO: make sure to think about the implications of
-		// shutting down and potentially intercepting signals
-		// here after i've removed myself from the nominated!
-
-		// if we are connected to self and other servers exist: trigger
-		// if any of the obj.clientURLs are in the endpoints list, then
-		// we are stale. it is not likely that the advertised endpoints
-		// have been updated because we're still blocking the callback.
-		stale := false
-		for key, eps := range obj.endpoints {
-			if key != obj.hostname && len(eps) > 0 { // other endpoints?
-				stale = true // only half true so far
-				break
-			}
-		}
-
-		for _, curl := range obj.clientURLs { // these just got shutdown
-			for _, ep := range obj.client.Endpoints() {
-				if (curl.Host == ep || curl.String() == ep) && stale {
-					// add back the sentinel to force update
-					log.Printf("Etcd: Forcing endpoint callback...")
-					obj.endpoints[seedSentinel] = nil                    //etcdtypes.URLs{}
-					obj.endpointCallback(nil)                            // update!
-					return &CtxReconnectErr{"local server has shutdown"} // trigger reconnect
-				}
-			}
-		}
-	}
-	return nil
-}
-
-// endpointCallback runs to respond to the endpoint list change events.
-func (obj *EmbdEtcd) endpointCallback(re *RE) error {
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: endpointCallback()")
-		defer log.Printf("Trace: Etcd: endpointCallback(): Finished!")
-	}
-
-	// if the startup sentinel exists, or delta fails, then get a fresh copy
-	endpoints := make(etcdtypes.URLsMap, len(obj.endpoints))
-	// this would copy the reference: endpoints := obj.endpoints
-	for k, v := range obj.endpoints {
-		endpoints[k] = make(etcdtypes.URLs, len(v))
-		copy(endpoints[k], v)
-	}
-
-	// updating
-	_, exists := endpoints[seedSentinel]
-	endpoints, err := ApplyDeltaEvents(re, endpoints)
-	if err != nil || exists {
-		// TODO: we could also lookup endpoints from the maintenance api
-		endpoints, err = Endpoints(obj)
-		if err != nil {
-			return err
-		}
-	}
-
-	// change detection
-	var changed = false // do we need to update?
-	if len(obj.endpoints) != len(endpoints) {
-		changed = true
-	}
-	for k, v1 := range obj.endpoints {
-		if changed { // catches previous statement and inner loop break
-			break
-		}
-		v2, exists := endpoints[k]
-		if !exists {
-			changed = true
-			break
-		}
-		if len(v1) != len(v2) {
-			changed = true
-			break
-		}
-		for i := range v1 {
-			if v1[i] != v2[i] {
-				changed = true
-				break
-			}
-		}
-	}
-	// is the endpoint list different?
-	if changed {
-		obj.endpoints = endpoints // set
-		if eps := endpoints; len(eps) > 0 {
-			log.Printf("Etcd: Endpoints: %+v", eps)
-		} else {
-			log.Printf("Etcd: Endpoints: []")
-		}
-		// can happen if a server drops out for example
-		return &CtxReconnectErr{"endpoint list changed"} // trigger reconnect with new endpoint list
-	}
-
-	return nil
-}
-
-// idealClusterSizeCallback runs to respond to the ideal cluster size changes.
-func (obj *EmbdEtcd) idealClusterSizeCallback(re *RE) error {
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: idealClusterSizeCallback()")
-		defer log.Printf("Trace: Etcd: idealClusterSizeCallback(): Finished!")
-	}
-	path := fmt.Sprintf("%s/idealClusterSize", NS)
-	for _, event := range re.response.Events {
-		if key := bytes.NewBuffer(event.Kv.Key).String(); key != path {
-			continue
-		}
-		if event.Type != etcd.EventTypePut {
-			continue
-		}
-		val := bytes.NewBuffer(event.Kv.Value).String()
-		if val == "" {
-			continue
-		}
-		v, err := strconv.ParseUint(val, 10, 16)
-		if err != nil {
-			continue
-		}
-		if i := uint16(v); i > 0 {
-			log.Printf("Etcd: Ideal cluster size is now: %d", i)
-			obj.idealClusterSize = i
-			// now, emulate the calling of the volunteerCallback...
-			go func() {
-				obj.wevents <- &RE{callback: obj.volunteerCallback, errCheck: true} // send event
-			}() // don't block
-		}
-	}
-	return nil
-}
-
-// LocalhostClientURLs returns the most localhost like URLs for direct connection.
-// This gets clients to talk to the local servers first before searching remotely.
-func (obj *EmbdEtcd) LocalhostClientURLs() etcdtypes.URLs {
-	// look through obj.clientURLs and return the localhost ones
-	urls := etcdtypes.URLs{}
-	for _, x := range obj.clientURLs {
-		// "localhost", ::1 or anything in 127.0.0.0/8 is valid!
-		if s := x.Host; strings.HasPrefix(s, "localhost") || strings.HasPrefix(s, "127.") || strings.HasPrefix(s, "[::1]") {
-			urls = append(urls, x)
-		}
-		// or local unix domain socket
-		if x.Scheme == "unix" {
-			urls = append(urls, x)
-		}
-	}
-	return urls
-}
-
-// StartServer kicks of a new embedded etcd server.
-func (obj *EmbdEtcd) StartServer(newCluster bool, peerURLsMap etcdtypes.URLsMap) error {
-	var err error
-	memberName := obj.hostname
-
-	err = os.MkdirAll(obj.dataDir, 0770)
-	if err != nil {
-		log.Printf("Etcd: StartServer: Couldn't mkdir: %s.", obj.dataDir)
-		log.Printf("Etcd: StartServer: Mkdir error: %s.", err)
-		obj.DestroyServer()
-		return err
-	}
-
-	// if no peer URLs exist, then starting a server is mostly only for some
-	// testing, but etcd doesn't allow the value to be empty so we use this!
-	peerURLs, _ := etcdtypes.NewURLs([]string{"http://localhost:0"})
-	if len(obj.serverURLs) > 0 {
-		peerURLs = obj.serverURLs
-	}
-	initialPeerURLsMap := make(etcdtypes.URLsMap)
-	for k, v := range peerURLsMap {
-		initialPeerURLsMap[k] = v // copy
-	}
-	if _, exists := peerURLsMap[memberName]; !exists {
-		initialPeerURLsMap[memberName] = peerURLs
-	}
-
-	aCUrls := obj.clientURLs
-	if len(obj.advertiseClientURLs) > 0 {
-		aCUrls = obj.advertiseClientURLs
-	}
-	aPUrls := peerURLs
-	if len(obj.advertiseServerURLs) > 0 {
-		aPUrls = obj.advertiseServerURLs
-	}
-
-	// embed etcd
-	cfg := embed.NewConfig()
-	cfg.Name = memberName // hostname
-	cfg.Dir = obj.dataDir
-	cfg.LCUrls = obj.clientURLs
-	cfg.LPUrls = peerURLs
-	cfg.ACUrls = aCUrls
-	cfg.APUrls = aPUrls
-	cfg.StrictReconfigCheck = false // XXX: workaround https://github.com/coreos/etcd/issues/6305
-	cfg.MaxTxnOps = DefaultMaxTxnOps
-
-	cfg.InitialCluster = initialPeerURLsMap.String() // including myself!
-	if newCluster {
-		cfg.ClusterState = embed.ClusterStateFlagNew
-	} else {
-		cfg.ClusterState = embed.ClusterStateFlagExisting
-	}
-	//cfg.ForceNewCluster = newCluster // TODO: ?
-
-	log.Printf("Etcd: StartServer: Starting server...")
-	obj.server, err = embed.StartEtcd(cfg)
-	if err != nil {
-		return err
-	}
-	select {
-	case <-obj.server.Server.ReadyNotify(): // we hang here if things are bad
-		log.Printf("Etcd: StartServer: Done starting server!") // it didn't hang!
-	case <-time.After(time.Duration(MaxStartServerTimeout) * time.Second):
-		e := fmt.Errorf("timeout of %d seconds reached", MaxStartServerTimeout)
-		log.Printf("Etcd: StartServer: %s", e.Error())
-		obj.server.Server.Stop() // trigger a shutdown
-		obj.serverwg.Add(1)      // add for the DestroyServer()
-		obj.DestroyServer()
-		return e
-	// TODO: should we wait for this notification elsewhere?
-	case <-obj.server.Server.StopNotify(): // it's going down now...
-		e := fmt.Errorf("received stop notification")
-		log.Printf("Etcd: StartServer: %s", e.Error())
-		obj.server.Server.Stop() // trigger a shutdown
-		obj.serverwg.Add(1)      // add for the DestroyServer()
-		obj.DestroyServer()
-		return e
-	}
-	//log.Fatal(<-obj.server.Err())	XXX
-	log.Printf("Etcd: StartServer: Server running...")
-	obj.memberID = uint64(obj.server.Server.ID()) // store member id for internal use
-	close(obj.serverReady)                        // send a signal
-
-	obj.serverwg.Add(1)
-	return nil
-}
-
-// ServerReady returns on a channel when the server has started successfully.
-func (obj *EmbdEtcd) ServerReady() <-chan struct{} { return obj.serverReady }
-
-// DestroyServer shuts down the embedded etcd server portion.
-func (obj *EmbdEtcd) DestroyServer() error {
-	var err error
-	log.Printf("Etcd: DestroyServer: Destroying...")
-	if obj.server != nil {
-		obj.server.Close() // this blocks until server has stopped
-	}
-	log.Printf("Etcd: DestroyServer: Done closing...")
-
-	obj.memberID = 0
-	if obj.server == nil { // skip the .Done() below because we didn't .Add(1) it.
-		return err
-	}
-	obj.server = nil // important because this is used as an isRunning flag
-	log.Printf("Etcd: DestroyServer: Unlocking server...")
-	obj.serverReady = make(chan struct{}) // reset the signal
-	obj.serverwg.Done()                   // -1
-	return err
-}
-
-//func UrlRemoveScheme(urls etcdtypes.URLs) []string {
-//	strs := []string{}
-//	for _, u := range urls {
-//		strs = append(strs, u.Host) // remove http:// prefix
-//	}
-//	return strs
-//}
-
-// ApplyDeltaEvents modifies a URLsMap with the deltas from a WatchResponse.
-func ApplyDeltaEvents(re *RE, urlsmap etcdtypes.URLsMap) (etcdtypes.URLsMap, error) {
-	if re == nil { // passthrough
-		return urlsmap, nil
-	}
-	for _, event := range re.response.Events {
-		key := bytes.NewBuffer(event.Kv.Key).String()
-		key = key[len(re.path):] // remove path prefix
-		log.Printf("Etcd: ApplyDeltaEvents: Event(%s): %s", event.Type.String(), key)
-
-		switch event.Type {
-		case etcd.EventTypePut:
-			val := bytes.NewBuffer(event.Kv.Value).String()
-			if val == "" {
-				return nil, fmt.Errorf("value in ApplyDeltaEvents is empty")
-			}
-			urls, err := etcdtypes.NewURLs(strings.Split(val, ","))
-			if err != nil {
-				return nil, fmt.Errorf("format error in ApplyDeltaEvents: %v", err)
-			}
-			urlsmap[key] = urls // add to map
-
-		// expiry cases are seen as delete in v3 for now
-		//case etcd.EventTypeExpire: // doesn't exist right now
-		//	fallthrough
-		case etcd.EventTypeDelete:
-			if _, exists := urlsmap[key]; !exists {
-				// this can happen if we retry an operation b/w
-				// a reconnect so ignore if we are reconnecting
-				log.Printf("Etcd: ApplyDeltaEvents: Inconsistent key: %v", key)
-				return nil, errApplyDeltaEventsInconsistent
-			}
-			delete(urlsmap, key)
-
-		default:
-			return nil, fmt.Errorf("unknown event in ApplyDeltaEvents: %+v", event.Type)
-		}
-	}
-	return urlsmap, nil
+	obj.clients = append(obj.clients, c) // make sure to clean up after...
+	return c, nil
 }
diff --git a/etcd/etcd_test.go b/etcd/etcd_test.go
index 8a794461..30b41b0d 100644
--- a/etcd/etcd_test.go
+++ b/etcd/etcd_test.go
@@ -21,31 +21,19 @@ package etcd
 
 import (
 	"testing"
-
-	etcdtypes "github.com/coreos/etcd/pkg/types"
 )
 
-func TestNewEmbdEtcd(t *testing.T) {
-	// should return a new etcd object
-
-	noServer := false
-	var flags Flags
-
-	obj := NewEmbdEtcd("", nil, nil, nil, nil, nil, noServer, false, 0, flags, "", nil)
-	if obj == nil {
-		t.Fatal("failed to create server object")
-	}
-}
-
-func TestNewEmbdEtcdConfigValidation(t *testing.T) {
-	// running --no-server with no --seeds specified should fail early
-
-	seeds := make(etcdtypes.URLs, 0)
-	noServer := true
-	var flags Flags
-
-	obj := NewEmbdEtcd("", seeds, nil, nil, nil, nil, noServer, false, 0, flags, "", nil)
-	if obj != nil {
-		t.Fatal("server initialization should fail on invalid configuration")
+func TestValidation1(t *testing.T) {
+	// running --no-server with no --seeds should not validate at the moment
+	embdEtcd := &EmbdEtcd{
+		//Seeds: etcdtypes.URLs{},
+		NoServer: true,
+	}
+	if err := embdEtcd.Validate(); err == nil {
+		t.Errorf("expected validation err, got nil")
+	}
+	if err := embdEtcd.Init(); err == nil {
+		t.Errorf("expected init err, got nil")
+		defer embdEtcd.Close()
 	}
 }
diff --git a/etcd/event/event.go b/etcd/event/event.go
deleted file mode 100644
index 4601d2c3..00000000
--- a/etcd/event/event.go
+++ /dev/null
@@ -1,70 +0,0 @@
-// Mgmt
-// Copyright (C) 2013-2019+ James Shubin and the project contributors
-// Written by James Shubin <james@shubin.ca> and the project contributors
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-// Package event provides some primitives that are used for message passing.
-package event
-
-import (
-	"fmt"
-)
-
-// Resp is a channel to be used for boolean responses. A nil represents an ACK,
-// and a non-nil represents a NACK (false). This also lets us use custom errors.
-type Resp chan error
-
-// NewResp is just a helper to return the right type of response channel.
-func NewResp() Resp {
-	resp := make(chan error)
-	return resp
-}
-
-// ACK sends a true value to resp.
-func (resp Resp) ACK() {
-	if resp != nil {
-		resp <- nil // TODO: close instead?
-	}
-}
-
-// NACK sends a false value to resp.
-func (resp Resp) NACK() {
-	if resp != nil {
-		resp <- fmt.Errorf("NACK")
-	}
-}
-
-// ACKNACK sends a custom ACK or NACK. The ACK value is always nil, the NACK can
-// be any non-nil error value.
-func (resp Resp) ACKNACK(err error) {
-	if resp != nil {
-		resp <- err
-	}
-}
-
-// Wait waits for any response from a Resp channel and returns it.
-func (resp Resp) Wait() error {
-	return <-resp
-}
-
-// ACKWait waits for a +ive Ack from a Resp channel.
-func (resp Resp) ACKWait() {
-	for {
-		// wait until true value
-		if resp.Wait() == nil {
-			return
-		}
-	}
-}
diff --git a/etcd/fs/file.go b/etcd/fs/file.go
index 22d48d3a..0ef637bc 100644
--- a/etcd/fs/file.go
+++ b/etcd/fs/file.go
@@ -22,7 +22,6 @@ import (
 	"encoding/gob"
 	"fmt"
 	"io"
-	"log"
 	"os"
 	"path"
 	"strings"
@@ -32,6 +31,7 @@ import (
 	"github.com/purpleidea/mgmt/util/errwrap"
 
 	etcd "github.com/coreos/etcd/clientv3" // "clientv3"
+	etcdutil "github.com/coreos/etcd/clientv3/clientv3util"
 )
 
 func init() {
@@ -263,10 +263,8 @@ func (obj *File) Sync() error {
 
 	p := obj.path() // store file data at this path in etcd
 
-	// TODO: use https://github.com/coreos/etcd/pull/7417 if merged
-	cmp := etcd.Compare(etcd.Version(p), "=", 0) // KeyMissing
-	//cmp := etcd.KeyMissing(p))
-
+	//cmp := etcd.Compare(etcd.Version(p), "=", 0) // KeyMissing
+	cmp := etcdutil.KeyMissing(p)
 	op := etcd.OpPut(p, string(obj.data)) // this pushes contents to server
 
 	// it's important to do this in one transaction, and atomically, because
@@ -277,7 +275,7 @@ func (obj *File) Sync() error {
 	}
 	if !result.Succeeded {
 		if obj.fs.Debug {
-			log.Printf("debug: data already exists in storage")
+			obj.fs.Logf("debug: data already exists in storage")
 		}
 	}
 
diff --git a/etcd/fs/fs.go b/etcd/fs/fs.go
index 2ece4389..1f9a4dd1 100644
--- a/etcd/fs/fs.go
+++ b/etcd/fs/fs.go
@@ -20,6 +20,7 @@ package fs
 
 import (
 	"bytes"
+	"context"
 	"crypto/sha256"
 	"encoding/gob"
 	"encoding/hex"
@@ -27,19 +28,18 @@ import (
 	"fmt"
 	"hash"
 	"io"
-	"log"
 	"os"
 	"path"
 	"strings"
 	"syscall"
 	"time"
 
+	"github.com/purpleidea/mgmt/etcd/interfaces"
 	"github.com/purpleidea/mgmt/util/errwrap"
 
 	etcd "github.com/coreos/etcd/clientv3" // "clientv3"
 	rpctypes "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
 	"github.com/spf13/afero"
-	context "golang.org/x/net/context"
 )
 
 func init() {
@@ -91,7 +91,7 @@ var (
 // XXX: this is harder because we need to list of *all* metadata paths, if we
 // want them to be able to share storage backends. (we do)
 type Fs struct {
-	Client *etcd.Client
+	Client interfaces.Client
 
 	Metadata string // location of "superblock" for this filesystem
 
@@ -99,6 +99,7 @@ type Fs struct {
 	Hash       string // eg: sha256
 
 	Debug bool
+	Logf  func(format string, v ...interface{})
 
 	sb      *superBlock
 	mounted bool
@@ -115,7 +116,7 @@ type superBlock struct {
 
 // NewEtcdFs creates a new filesystem handle on an etcd client connection. You
 // must specify the metadata string that you wish to use.
-func NewEtcdFs(client *etcd.Client, metadata string) afero.Fs {
+func NewEtcdFs(client interfaces.Client, metadata string) afero.Fs {
 	return &Fs{
 		Client:   client,
 		Metadata: metadata,
@@ -127,23 +128,26 @@ func (obj *Fs) get(path string, opts ...etcd.OpOption) (map[string][]byte, error
 	ctx, cancel := context.WithTimeout(context.Background(), EtcdTimeout)
 	resp, err := obj.Client.Get(ctx, path, opts...)
 	cancel()
-	if err != nil || resp == nil {
+	if err != nil {
 		return nil, err
 	}
+	if resp == nil {
+		return nil, fmt.Errorf("empty response")
+	}
 
-	// TODO: write a resp.ToMap() function on https://godoc.org/github.com/coreos/etcd/etcdserver/etcdserverpb#RangeResponse
-	result := make(map[string][]byte) // formerly: map[string][]byte
-	for _, x := range resp.Kvs {
-		result[string(x.Key)] = x.Value // formerly: bytes.NewBuffer(x.Value).String()
+	// FIXME: just return resp instead if it was map[string]string?
+	result := make(map[string][]byte)
+	for key, val := range resp {
+		result[key] = []byte(val) // wasteful transform
 	}
 
 	return result, nil
 }
 
 // put a value into etcd.
-func (obj *Fs) put(path string, data []byte, opts ...etcd.OpOption) error {
+func (obj *Fs) set(path string, data []byte, opts ...etcd.OpOption) error {
 	ctx, cancel := context.WithTimeout(context.Background(), EtcdTimeout)
-	_, err := obj.Client.Put(ctx, path, string(data), opts...) // TODO: obj.Client.KV ?
+	err := obj.Client.Set(ctx, path, string(data), opts...)
 	cancel()
 	if err != nil {
 		switch err {
@@ -163,7 +167,7 @@ func (obj *Fs) put(path string, data []byte, opts ...etcd.OpOption) error {
 // txn runs a txn in etcd.
 func (obj *Fs) txn(ifcmps []etcd.Cmp, thenops, elseops []etcd.Op) (*etcd.TxnResponse, error) {
 	ctx, cancel := context.WithTimeout(context.Background(), EtcdTimeout)
-	resp, err := obj.Client.Txn(ctx).If(ifcmps...).Then(thenops...).Else(elseops...).Commit()
+	resp, err := obj.Client.Txn(ctx, ifcmps, thenops, elseops)
 	cancel()
 	return resp, err
 }
@@ -194,7 +198,7 @@ func (obj *Fs) sync() error {
 		return errwrap.Wrapf(err, "gob failed to encode")
 	}
 	//base64.StdEncoding.EncodeToString(b.Bytes())
-	return obj.put(obj.Metadata, b.Bytes())
+	return obj.set(obj.Metadata, b.Bytes())
 }
 
 // mount downloads the initial cache of metadata, including the *file tree.
@@ -213,7 +217,7 @@ func (obj *Fs) mount() error {
 	}
 	if result == nil || len(result) == 0 { // nothing found, create the fs
 		if obj.Debug {
-			log.Printf("debug: mount: creating new fs at: %s", obj.Metadata)
+			obj.Logf("mount: creating new fs at: %s", obj.Metadata)
 		}
 		// trim any trailing slashes from DataPrefix
 		for strings.HasSuffix(obj.DataPrefix, "/") {
@@ -248,7 +252,7 @@ func (obj *Fs) mount() error {
 	}
 
 	if obj.Debug {
-		log.Printf("debug: mount: opening old fs at: %s", obj.Metadata)
+		obj.Logf("mount: opening old fs at: %s", obj.Metadata)
 	}
 	sb, exists := result[obj.Metadata]
 	if !exists {
diff --git a/etcd/fs/fs_test.go b/etcd/fs/fs_test.go
index fd864063..41885215 100644
--- a/etcd/fs/fs_test.go
+++ b/etcd/fs/fs_test.go
@@ -26,7 +26,7 @@ import (
 	"syscall"
 	"testing"
 
-	"github.com/purpleidea/mgmt/etcd"
+	"github.com/purpleidea/mgmt/etcd/client"
 	etcdfs "github.com/purpleidea/mgmt/etcd/fs"
 	"github.com/purpleidea/mgmt/integration"
 	"github.com/purpleidea/mgmt/util"
@@ -41,6 +41,7 @@ import (
 const (
 	umask      = 0666
 	superblock = "/some/superblock" // TODO: generate randomly per test?
+	ns         = "/_mgmt/test"      // must not end with a slash!
 )
 
 // Ensure that etcdfs.Fs implements afero.Fs.
@@ -79,20 +80,26 @@ func TestFs1(t *testing.T) {
 	}
 	defer stopEtcd() // ignore the error
 
-	etcdClient := &etcd.ClientEtcd{
-		Seeds: []string{"localhost:2379"}, // endpoints
+	logf := func(format string, v ...interface{}) {
+		t.Logf("test: etcd: fs: "+format, v...)
 	}
+	etcdClient := client.NewClientFromSeedsNamespace(
+		[]string{"localhost:2379"}, // endpoints
+		ns,
+	)
 
-	if err := etcdClient.Connect(); err != nil {
+	if err := etcdClient.Init(); err != nil {
 		t.Errorf("client connection error: %+v", err)
 		return
 	}
-	defer etcdClient.Destroy()
+	defer etcdClient.Close()
 
 	etcdFs := &etcdfs.Fs{
-		Client:     etcdClient.GetClient(),
+		Client:     etcdClient,
 		Metadata:   superblock,
 		DataPrefix: etcdfs.DefaultDataPrefix,
+
+		Logf: logf,
 	}
 	//var etcdFs afero.Fs = NewEtcdFs()
 
@@ -193,20 +200,26 @@ func TestFs2(t *testing.T) {
 	}
 	defer stopEtcd() // ignore the error
 
-	etcdClient := &etcd.ClientEtcd{
-		Seeds: []string{"localhost:2379"}, // endpoints
+	logf := func(format string, v ...interface{}) {
+		t.Logf("test: etcd: fs: "+format, v...)
 	}
+	etcdClient := client.NewClientFromSeedsNamespace(
+		[]string{"localhost:2379"}, // endpoints
+		ns,
+	)
 
-	if err := etcdClient.Connect(); err != nil {
+	if err := etcdClient.Init(); err != nil {
 		t.Errorf("client connection error: %+v", err)
 		return
 	}
-	defer etcdClient.Destroy()
+	defer etcdClient.Close()
 
 	etcdFs := &etcdfs.Fs{
-		Client:     etcdClient.GetClient(),
+		Client:     etcdClient,
 		Metadata:   superblock,
 		DataPrefix: etcdfs.DefaultDataPrefix,
+
+		Logf: logf,
 	}
 
 	tree, err := util.FsTree(etcdFs, "/")
@@ -246,20 +259,26 @@ func TestFs3(t *testing.T) {
 	}
 	defer stopEtcd() // ignore the error
 
-	etcdClient := &etcd.ClientEtcd{
-		Seeds: []string{"localhost:2379"}, // endpoints
+	logf := func(format string, v ...interface{}) {
+		t.Logf("test: etcd: fs: "+format, v...)
 	}
+	etcdClient := client.NewClientFromSeedsNamespace(
+		[]string{"localhost:2379"}, // endpoints
+		ns,
+	)
 
-	if err := etcdClient.Connect(); err != nil {
+	if err := etcdClient.Init(); err != nil {
 		t.Errorf("client connection error: %+v", err)
 		return
 	}
-	defer etcdClient.Destroy()
+	defer etcdClient.Close()
 
 	etcdFs := &etcdfs.Fs{
-		Client:     etcdClient.GetClient(),
+		Client:     etcdClient,
 		Metadata:   superblock,
 		DataPrefix: etcdfs.DefaultDataPrefix,
+
+		Logf: logf,
 	}
 
 	if err := etcdFs.Mkdir("/tmp", umask); err != nil {
@@ -371,18 +390,19 @@ func TestEtcdCopyFs0(t *testing.T) {
 		}
 		defer stopEtcd() // ignore the error
 
-		etcdClient := &etcd.ClientEtcd{
-			Seeds: []string{"localhost:2379"}, // endpoints
-		}
+		etcdClient := client.NewClientFromSeedsNamespace(
+			[]string{"localhost:2379"}, // endpoints
+			ns,
+		)
 
-		if err := etcdClient.Connect(); err != nil {
+		if err := etcdClient.Init(); err != nil {
 			t.Errorf("client connection error: %+v", err)
 			return
 		}
-		defer etcdClient.Destroy()
+		defer etcdClient.Close()
 
 		etcdFs := &etcdfs.Fs{
-			Client:     etcdClient.GetClient(),
+			Client:     etcdClient,
 			Metadata:   superblock,
 			DataPrefix: etcdfs.DefaultDataPrefix,
 		}
diff --git a/etcd/helpers.go b/etcd/helpers.go
new file mode 100644
index 00000000..38ee43fc
--- /dev/null
+++ b/etcd/helpers.go
@@ -0,0 +1,160 @@
+// Mgmt
+// Copyright (C) 2013-2019+ James Shubin and the project contributors
+// Written by James Shubin <james@shubin.ca> and the project contributors
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package etcd
+
+import (
+	"context"
+	"fmt"
+	"sort"
+	"strings"
+
+	"github.com/purpleidea/mgmt/etcd/interfaces"
+	"github.com/purpleidea/mgmt/util"
+	"github.com/purpleidea/mgmt/util/errwrap"
+
+	etcd "github.com/coreos/etcd/clientv3"
+	pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
+	"github.com/coreos/etcd/mvcc/mvccpb"
+	etcdtypes "github.com/coreos/etcd/pkg/types" // generated package
+)
+
+// setEndpoints sets the endpoints on the etcd client if it exists. It
+// prioritizes local endpoints for performance, and so that if a remote endpoint
+// disconnects we aren't affected.
+func (obj *EmbdEtcd) setEndpoints() {
+	if obj.etcd == nil { // if client doesn't exist, skip!
+		return
+	}
+
+	eps := fromURLsMapToStringList(obj.endpoints) // get flat list
+	sort.Strings(eps)                             // sort for determinism
+
+	curls, _ := obj.curls() // ignore error, was already validated
+
+	// prio sort so we connect locally first
+	urls := fromURLsToStringList(curls)
+	headFn := func(x string) bool {
+		return !util.StrInList(x, urls)
+	}
+	eps = util.PriorityStrSliceSort(eps, headFn)
+	if obj.Debug {
+		obj.Logf("set endpoints to: %+v", eps)
+	}
+	// trigger reconnect with new endpoint list
+	// XXX: When a client switches endpoints, do the watches continue from
+	// where they last were or do they restart? Add rev restart if needed.
+	obj.etcd.SetEndpoints(eps...) // no error to check
+}
+
+// ConnectBlock runs a command as soon as the client is connected. When this
+// happens, it closes the output channel. In case any error occurs, it sends it
+// on that channel.
+func (obj *EmbdEtcd) ConnectBlock(ctx context.Context, fn func(context.Context) error) <-chan error {
+	ch := make(chan error)
+	obj.wg.Add(1)
+	go func() {
+		defer obj.wg.Done()
+		defer close(ch)
+		select {
+		case <-obj.connectSignal: // the client is connected!
+		case <-ctx.Done():
+			return
+		}
+		if fn == nil {
+			return
+		}
+		if err := fn(ctx); err != nil {
+			select {
+			case ch <- err:
+			case <-ctx.Done():
+			}
+		}
+	}()
+	return ch
+}
+
+// bootstrapWatcherData returns some a minimal WatcherData struct to simulate an
+// initial event for bootstrapping the nominateCb before we've started up.
+func bootstrapWatcherData(hostname string, urls etcdtypes.URLs) *interfaces.WatcherData {
+	return &interfaces.WatcherData{
+		Created: true, // add this flag to hint that we're bootstrapping
+
+		Header: pb.ResponseHeader{}, // not needed
+		Events: []*etcd.Event{
+			{
+				Type: mvccpb.PUT, // or mvccpb.DELETE
+				Kv: &mvccpb.KeyValue{
+					Key:   []byte(hostname),
+					Value: []byte(urls.String()),
+				},
+			},
+		},
+	}
+}
+
+// applyDeltaEvents applies the WatchResponse deltas to a URLsMap and returns a
+// modified copy.
+func applyDeltaEvents(data *interfaces.WatcherData, urlsMap etcdtypes.URLsMap) (etcdtypes.URLsMap, error) {
+	if err := data.Err; err != nil {
+		return nil, errwrap.Wrapf(err, "data contains an error")
+	}
+	out, err := copyURLsMap(urlsMap)
+	if err != nil {
+		return nil, err
+	}
+	if data == nil { // passthrough
+		return out, nil
+	}
+	var reterr error
+	for _, event := range data.Events {
+		key := string(event.Kv.Key)
+		key = key[len(data.Path):] // remove path prefix
+		//obj.Logf("applyDeltaEvents: Event(%s): %s", event.Type.String(), key)
+
+		switch event.Type {
+		case etcd.EventTypePut:
+			val := string(event.Kv.Value)
+			if val == "" {
+				return nil, fmt.Errorf("value is empty")
+			}
+			urls, err := etcdtypes.NewURLs(strings.Split(val, ","))
+			if err != nil {
+				return nil, errwrap.Wrapf(err, "format error")
+			}
+			urlsMap[key] = urls // add to map
+
+		// expiry cases are seen as delete in v3 for now
+		//case etcd.EventTypeExpire: // doesn't exist right now
+		//	fallthrough
+		case etcd.EventTypeDelete:
+			if _, exists := urlsMap[key]; exists {
+				delete(urlsMap, key)
+				continue
+			}
+
+			// this can happen if we retry an operation between a
+			// reconnect, so ignore in case we are reconnecting...
+			reterr = errInconsistentApply // key not found
+			// keep applying in case this is ignored
+
+		default:
+			return nil, fmt.Errorf("unknown event: %v", event.Type)
+		}
+	}
+	return urlsMap, reterr
+}
diff --git a/etcd/interfaces/client.go b/etcd/interfaces/client.go
new file mode 100644
index 00000000..8d15e1e8
--- /dev/null
+++ b/etcd/interfaces/client.go
@@ -0,0 +1,63 @@
+// Mgmt
+// Copyright (C) 2013-2019+ James Shubin and the project contributors
+// Written by James Shubin <james@shubin.ca> and the project contributors
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package interfaces
+
+import (
+	"context"
+
+	etcd "github.com/coreos/etcd/clientv3" // "clientv3"
+	pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
+)
+
+// WatcherData is the structure of data passed to a callback from any watcher.
+type WatcherData struct {
+	// Created is true if this event is the initial event sent on startup.
+	Created bool
+
+	// XXX: what goes here... this? or a more processed version?
+	Path   string // the path we're watching
+	Header pb.ResponseHeader
+	Events []*etcd.Event
+	Err    error
+}
+
+// WatcherInfo is what is returned from a Watcher. It contains everything you
+// might need to get information about the running watch.
+type WatcherInfo struct {
+	// Cancel must be called to shutdown the Watcher when we are done with
+	// it. You can alternatively call cancel on the input ctx.
+	Cancel func()
+
+	// Events returns a channel of any events that occur. This happens on
+	// watch startup, watch event, and watch failure. This channel closes
+	// when the Watcher shuts down. If you block on these reads, then you
+	// will block the entire Watcher which is usually not what you want.
+	Events <-chan *WatcherData
+}
+
+// Client provides a simple interface specification for client requests. Both
+// EmbdEtcd.MakeClient and client.Simple implement this.
+type Client interface {
+	GetClient() *etcd.Client
+	Set(ctx context.Context, key, value string, opts ...etcd.OpOption) error
+	Get(ctx context.Context, path string, opts ...etcd.OpOption) (map[string]string, error)
+	Del(ctx context.Context, path string, opts ...etcd.OpOption) (int64, error)
+	Txn(ctx context.Context, ifCmps []etcd.Cmp, thenOps, elseOps []etcd.Op) (*etcd.TxnResponse, error)
+	Watcher(ctx context.Context, path string, opts ...etcd.OpOption) (chan error, error)
+	ComplexWatcher(ctx context.Context, path string, opts ...etcd.OpOption) (*WatcherInfo, error)
+}
diff --git a/etcd/interfaces/error.go b/etcd/interfaces/error.go
new file mode 100644
index 00000000..71d2c997
--- /dev/null
+++ b/etcd/interfaces/error.go
@@ -0,0 +1,33 @@
+// Mgmt
+// Copyright (C) 2013-2019+ James Shubin and the project contributors
+// Written by James Shubin <james@shubin.ca> and the project contributors
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package interfaces
+
+// Error is a constant error type that implements error.
+type Error string
+
+// Error fulfills the error interface of this type.
+func (e Error) Error() string { return string(e) }
+
+const (
+	// ErrNotExist is returned when GetStr or friends can not find the
+	// requested key.
+	ErrNotExist = Error("ErrNotExist")
+
+	// ErrShutdown is returned when we're exiting during a shutdown.
+	ErrShutdown = Error("ErrShutdown")
+)
diff --git a/etcd/membership.go b/etcd/membership.go
new file mode 100644
index 00000000..7a7bcfa1
--- /dev/null
+++ b/etcd/membership.go
@@ -0,0 +1,314 @@
+// Mgmt
+// Copyright (C) 2013-2019+ James Shubin and the project contributors
+// Written by James Shubin <james@shubin.ca> and the project contributors
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package etcd
+
+import (
+	"context"
+	"fmt"
+	"net/url"
+	"sort"
+	"time"
+
+	"github.com/purpleidea/mgmt/util/errwrap"
+
+	etcd "github.com/coreos/etcd/clientv3"
+	rpctypes "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
+	pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
+	etcdtypes "github.com/coreos/etcd/pkg/types"
+)
+
+// addSelfState is used to populate the initial state when I am adding myself.
+func (obj *EmbdEtcd) addSelfState() {
+	surls, _ := obj.surls() // validated on init
+	curls, _ := obj.curls() // validated on init
+	obj.membermap[obj.Hostname] = surls
+	obj.endpoints[obj.Hostname] = curls
+	obj.memberIDs[obj.Hostname] = obj.serverID
+}
+
+// addMemberState adds the specific member state to our local caches.
+func (obj *EmbdEtcd) addMemberState(member string, id uint64, surls, curls etcdtypes.URLs) {
+	obj.stateMutex.Lock()
+	defer obj.stateMutex.Unlock()
+	if surls != nil {
+		obj.membermap[member] = surls
+	}
+	if curls != nil { // TODO: && len(curls) > 0 ?
+		obj.endpoints[member] = curls
+	}
+	obj.memberIDs[member] = id
+}
+
+// rmMemberState removes the state of a given member.
+func (obj *EmbdEtcd) rmMemberState(member string) {
+	obj.stateMutex.Lock()
+	defer obj.stateMutex.Unlock()
+	delete(obj.membermap, member) // proactively delete it
+	delete(obj.endpoints, member) // proactively delete it
+	delete(obj.memberIDs, member) // proactively delete it
+}
+
+// updateMemberState updates some of our local state whenever we get new
+// information from a response.
+// TODO: ideally this would be []*etcd.Member but the types are inconsistent...
+// TODO: is it worth computing a delta to see if we need to change this?
+func (obj *EmbdEtcd) updateMemberState(members []*pb.Member) error {
+	//nominated := make(etcdtypes.URLsMap)
+	//volunteers := make(etcdtypes.URLsMap)
+	membermap := make(etcdtypes.URLsMap) // map[hostname]URLs
+	endpoints := make(etcdtypes.URLsMap) // map[hostname]URLs
+	memberIDs := make(map[string]uint64) // map[hostname]memberID
+
+	// URLs is etcdtypes.URLs is []url.URL
+	for _, member := range members {
+		// member.ID         // uint64
+		// member.Name       // string (hostname)
+		// member.PeerURLs   // []string (URLs)
+		// member.ClientURLs // []string (URLs)
+
+		if member.Name == "" { // not started yet
+			continue
+		}
+
+		// []string -> etcdtypes.URLs
+		purls, err := etcdtypes.NewURLs(member.PeerURLs)
+		if err != nil {
+			return err
+		}
+		curls, err := etcdtypes.NewURLs(member.ClientURLs)
+		if err != nil {
+			return err
+		}
+
+		//nominated[member.Name] = member.PeerURLs
+		//volunteers[member.Name] = member.PeerURLs
+		membermap[member.Name] = purls
+		endpoints[member.Name] = curls
+		memberIDs[member.Name] = member.ID
+	}
+
+	// set
+	obj.stateMutex.Lock()
+	defer obj.stateMutex.Unlock()
+	// can't set these two, because we only have a partial knowledge of them
+	//obj.nominated = nominated   // can't get this information (partial)
+	//obj.volunteers = volunteers // can't get this information (partial)
+	obj.membermap = membermap
+	obj.endpoints = endpoints
+	obj.memberIDs = memberIDs
+
+	return nil
+}
+
+// memberList returns the current list of server peer members in the cluster.
+func (obj *EmbdEtcd) memberList(ctx context.Context) (*etcd.MemberListResponse, error) {
+	return obj.etcd.MemberList(ctx)
+}
+
+// memberAdd adds a member to the cluster.
+func (obj *EmbdEtcd) memberAdd(ctx context.Context, peerURLs etcdtypes.URLs) (*etcd.MemberAddResponse, error) {
+	resp, err := obj.etcd.MemberAdd(ctx, peerURLs.StringSlice())
+	if err == rpctypes.ErrPeerURLExist { // commonly seen at startup
+		return nil, nil
+	}
+	if err == rpctypes.ErrMemberExist { // not seen yet, but plan for it
+		return nil, nil
+	}
+	return resp, err
+}
+
+// memberRemove removes a member by ID and returns if it worked, and also if
+// there was an error. This is because it might have run without error, but the
+// member wasn't found, for example. If a value of zero is used, then it will
+// try to remove itself in an idempotent way based on whether we're supposed to
+// be running a server or not.
+func (obj *EmbdEtcd) memberRemove(ctx context.Context, memberID uint64) (*etcd.MemberRemoveResponse, error) {
+	if memberID == 0 {
+		// copy value to avoid it changing part way through
+		memberID = obj.serverID
+	}
+	if memberID == 0 {
+		return nil, fmt.Errorf("can't remove memberID of zero")
+	}
+
+	resp, err := obj.etcd.MemberRemove(ctx, memberID)
+	if err == rpctypes.ErrMemberNotFound {
+		// if we get this, member already shut itself down :)
+		return nil, nil // unchanged, mask this error
+	}
+
+	return resp, err // changed
+}
+
+// memberChange polls the member list API and runs a function on each iteration.
+// If that function returns nil, then it closes the output channel to signal an
+// event. Between iterations, it sleeps for a given interval. Since this polls
+// and doesn't watch events, it could miss changes if they happen rapidly. It
+// does not send results on the channel, since results could be captured in the
+// fn callback. It will send an error on the channel if something goes wrong.
+// TODO: https://github.com/coreos/etcd/issues/5277
+func (obj *EmbdEtcd) memberChange(ctx context.Context, fn func([]*pb.Member) error, d time.Duration) (chan error, error) {
+	ch := make(chan error)
+	go func() {
+		defer close(ch)
+		for {
+			resp, err := obj.etcd.MemberList(ctx)
+			if err != nil {
+				select {
+				case ch <- err: // send error
+				case <-ctx.Done():
+				}
+				return
+			}
+			result := fn(resp.Members)
+			if result == nil { // done!
+				return
+			}
+			select {
+			case <-time.After(d): // sleep before retry
+				// pass
+			case <-ctx.Done():
+				return
+			}
+		}
+	}()
+	return ch, nil
+}
+
+// memberStateFromList does a member list, and applies the state to our cache.
+func (obj *EmbdEtcd) memberStateFromList(ctx context.Context) error {
+	resp, err := obj.memberList(ctx)
+	if err != nil {
+		return err
+	}
+	if resp == nil {
+		return fmt.Errorf("empty response")
+	}
+	reterr := obj.updateMemberState(resp.Members)
+	if reterr == nil {
+		obj.setEndpoints() // sync client with new endpoints
+	}
+	return reterr
+}
+
+// isLeader returns true if I'm the leader from the first sane perspective (pov)
+// that I can arbitrarily pick.
+func (obj *EmbdEtcd) isLeader(ctx context.Context) (bool, error) {
+	if obj.server == nil {
+		return false, nil // if i'm not a server, i'm not a leader, return
+	}
+
+	var ep, backup *url.URL
+	if len(obj.ClientURLs) > 0 {
+		// heuristic, but probably correct
+		addresses := localhostURLs(obj.ClientURLs)
+		if len(addresses) > 0 {
+			ep = &addresses[0] // arbitrarily pick the first one
+		}
+		backup = &obj.ClientURLs[0] // backup
+	}
+	if ep == nil && len(obj.AClientURLs) > 0 {
+		addresses := localhostURLs(obj.AClientURLs)
+		if len(addresses) > 0 {
+			ep = &addresses[0]
+		}
+		backup = &obj.AClientURLs[0] // backup
+	}
+	if ep == nil {
+		ep = backup
+	}
+	if ep == nil { // programming error?
+		return false, fmt.Errorf("no available endpoints")
+	}
+
+	// Ask for one perspective...
+	// TODO: are we supposed to use ep.Host instead?
+	resp, err := obj.etcd.Maintenance.Status(ctx, ep.String()) // this perspective
+	if err != nil {
+		return false, err
+	}
+	if resp == nil {
+		return false, fmt.Errorf("empty response")
+	}
+	if resp.Leader != obj.serverID { // i am not the leader
+		return false, nil
+	}
+
+	return true, nil
+}
+
+// moveLeaderSomewhere tries to transfer the leader to the alphanumerically
+// lowest member if the caller is the current leader. This contains races. If it
+// succeeds, it returns the member hostname that it transferred to. If it can't
+// transfer, but doesn't error, it returns an empty string. Any error condition
+// returns an error.
+func (obj *EmbdEtcd) moveLeaderSomewhere(ctx context.Context) (string, error) {
+	//if isLeader, err := obj.isLeader(ctx); err != nil { // race!
+	//	return "", errwrap.Wrapf(err, "error determining leader")
+	//} else if !isLeader {
+	//	if obj.Debug {
+	//		obj.Logf("we are not the leader...")
+	//	}
+	//	return "", nil
+	//}
+	// assume i am the leader!
+
+	memberList, err := obj.memberList(ctx)
+	if err != nil {
+		return "", err
+	}
+
+	var transfereeID uint64
+	m := make(map[string]uint64)
+	names := []string{}
+	for _, x := range memberList.Members {
+		m[x.Name] = x.ID
+		if x.Name != obj.Hostname {
+			names = append(names, x.Name)
+		}
+	}
+	if len(names) == 0 {
+		return "", nil // can't transfer to self, last remaining host
+	}
+	if len(names) == 1 && names[0] == obj.Hostname { // does this happen?
+		return "", nil // can't transfer to self
+	}
+	sort.Strings(names)
+	if len(names) > 0 {
+		// transfer to alphanumerically lowest ID for consistency...
+		transfereeID = m[names[0]]
+	}
+
+	if transfereeID == 0 { // safety
+		return "", fmt.Errorf("got memberID of zero")
+	}
+	if transfereeID == obj.serverID {
+		return "", nil // can't transfer to self
+	}
+
+	// do the move
+	if _, err := obj.etcd.MoveLeader(ctx, transfereeID); err == rpctypes.ErrNotLeader {
+		if obj.Debug {
+			obj.Logf("we are not the leader...")
+		}
+		return "", nil // we are not the leader
+	} else if err != nil {
+		return "", errwrap.Wrapf(err, "error moving leader")
+	}
+	return names[0], nil
+}
diff --git a/etcd/methods.go b/etcd/methods.go
index 3563f4f8..94ecf65d 100644
--- a/etcd/methods.go
+++ b/etcd/methods.go
@@ -18,394 +18,220 @@
 package etcd
 
 import (
+	"context"
 	"fmt"
-	"log"
-	"strconv"
 	"strings"
 
+	"github.com/purpleidea/mgmt/util/errwrap"
+
 	etcd "github.com/coreos/etcd/clientv3"
-	rpctypes "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
+	etcdutil "github.com/coreos/etcd/clientv3/clientv3util"
 	etcdtypes "github.com/coreos/etcd/pkg/types"
-	context "golang.org/x/net/context"
 )
 
-// TODO: Could all these Etcd*(obj *EmbdEtcd, ...) functions which deal with the
-// interface between etcd paths and behaviour be grouped into a single struct ?
-
-// Nominate nominates a particular client to be a server (peer).
-func Nominate(obj *EmbdEtcd, hostname string, urls etcdtypes.URLs) error {
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: Nominate(%v): %v", hostname, urls.String())
-		defer log.Printf("Trace: Etcd: Nominate(%v): Finished!", hostname)
-	}
-	// nominate someone to be a server
-	nominate := fmt.Sprintf("%s/nominated/%s", NS, hostname)
-	ops := []etcd.Op{} // list of ops in this txn
-	if urls != nil {
-		ops = append(ops, etcd.OpPut(nominate, urls.String())) // TODO: add a TTL? (etcd.WithLease)
-
-	} else { // delete message if set to erase
-		ops = append(ops, etcd.OpDelete(nominate))
-	}
-
-	if _, err := obj.Txn(nil, ops, nil); err != nil {
-		return fmt.Errorf("nominate failed") // exit in progress?
-	}
-	return nil
-}
-
-// Nominated returns a urls map of nominated etcd server volunteers.
-// NOTE: I know 'nominees' might be more correct, but is less consistent here
-func Nominated(obj *EmbdEtcd) (etcdtypes.URLsMap, error) {
-	path := fmt.Sprintf("%s/nominated/", NS)
-	keyMap, err := obj.Get(path, etcd.WithPrefix()) // map[string]string, bool
-	if err != nil {
-		return nil, fmt.Errorf("nominated isn't available: %v", err)
-	}
-	nominated := make(etcdtypes.URLsMap)
-	for key, val := range keyMap { // loop through directory of nominated
-		if !strings.HasPrefix(key, path) {
-			continue
+// volunteer offers yourself up to be a server if needed. If you specify a nil
+// value for urls, then this will unvolunteer yourself.
+func (obj *EmbdEtcd) volunteer(ctx context.Context, urls etcdtypes.URLs) error {
+	if obj.Debug {
+		if urls == nil {
+			obj.Logf("unvolunteer...")
+			defer obj.Logf("unvolunteer: done!")
+		} else {
+			obj.Logf("volunteer: %s", urls.String())
+			defer obj.Logf("volunteer: done!")
 		}
-		name := key[len(path):] // get name of nominee
-		if val == "" {          // skip "erased" values
-			continue
-		}
-		urls, err := etcdtypes.NewURLs(strings.Split(val, ","))
-		if err != nil {
-			return nil, fmt.Errorf("nominated data format error: %v", err)
-		}
-		nominated[name] = urls // add to map
-		if obj.flags.Debug {
-			log.Printf("Etcd: Nominated(%v): %v", name, val)
-		}
-	}
-	return nominated, nil
-}
-
-// Volunteer offers yourself up to be a server if needed.
-func Volunteer(obj *EmbdEtcd, urls etcdtypes.URLs) error {
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: Volunteer(%v): %v", obj.hostname, urls.String())
-		defer log.Printf("Trace: Etcd: Volunteer(%v): Finished!", obj.hostname)
 	}
 	// volunteer to be a server
-	volunteer := fmt.Sprintf("%s/volunteers/%s", NS, obj.hostname)
-	ops := []etcd.Op{} // list of ops in this txn
+	key := fmt.Sprintf(obj.NS+volunteerPathFmt, obj.Hostname)
+	ifs := []etcd.Cmp{} // list matching the desired state
+	ops := []etcd.Op{}  // list of ops in this txn
+	els := []etcd.Op{}
 	if urls != nil {
-		// XXX: adding a TTL is crucial! (i think)
-		ops = append(ops, etcd.OpPut(volunteer, urls.String())) // value is usually a peer "serverURL"
+		data := urls.String() // value is usually a peer "serverURL"
+		// XXX: bug: https://github.com/etcd-io/etcd/issues/10566
+		// XXX: reverse things with els to workaround the bug :(
+		//ifs = append(ifs, etcd.Compare(etcd.Value(key), "!=", data)) // desired state
+		//ops = append(ops, etcd.OpPut(key, data, etcd.WithLease(obj.leaseID)))
+		ifs = append(ifs, etcd.Compare(etcd.Value(key), "=", data)) // desired state
+		ifs = append(ifs, etcd.Compare(etcd.LeaseValue(key), "=", obj.leaseID))
+		els = append(ops, etcd.OpPut(key, data, etcd.WithLease(obj.leaseID)))
 
 	} else { // delete message if set to erase
-		ops = append(ops, etcd.OpDelete(volunteer))
+		ifs = append(ifs, etcdutil.KeyExists(key)) // desired state
+		ops = append(ops, etcd.OpDelete(key))
 	}
 
-	if _, err := obj.Txn(nil, ops, nil); err != nil {
-		return fmt.Errorf("volunteering failed") // exit in progress?
+	_, err := obj.client.Txn(ctx, ifs, ops, els)
+	msg := "volunteering failed"
+	if urls == nil {
+		msg = "unvolunteering failed"
 	}
-	return nil
+	return errwrap.Wrapf(err, msg)
 }
 
-// Volunteers returns a urls map of available etcd server volunteers.
-func Volunteers(obj *EmbdEtcd) (etcdtypes.URLsMap, error) {
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: Volunteers()")
-		defer log.Printf("Trace: Etcd: Volunteers(): Finished!")
+// nominate nominates a particular client to be a server (peer). If you specify
+// a nil value for urls, then this will unnominate that member.
+func (obj *EmbdEtcd) nominate(ctx context.Context, hostname string, urls etcdtypes.URLs) error {
+	if obj.Debug {
+		if urls == nil {
+			obj.Logf("unnominate(%s)...", hostname)
+			defer obj.Logf("unnominate(%s): done!", hostname)
+		} else {
+			obj.Logf("nominate(%s): %s", hostname, urls.String())
+			defer obj.Logf("nominate(%s): done!", hostname)
+		}
 	}
-	path := fmt.Sprintf("%s/volunteers/", NS)
-	keyMap, err := obj.Get(path, etcd.WithPrefix())
+	// nominate someone to be a server
+	key := fmt.Sprintf(obj.NS+nominatedPathFmt, hostname)
+	ifs := []etcd.Cmp{} // list matching the desired state
+	ops := []etcd.Op{}  // list of ops in this txn
+	els := []etcd.Op{}
+	if urls != nil {
+		data := urls.String()
+		// XXX: bug: https://github.com/etcd-io/etcd/issues/10566
+		// XXX: reverse things with els to workaround the bug :(
+		//ifs = append(ifs, etcd.Compare(etcd.Value(key), "!=", data)) // desired state
+		//ops = append(ops, etcd.OpPut(key, data)) // TODO: add a TTL? (etcd.WithLease)
+		ifs = append(ifs, etcd.Compare(etcd.Value(key), "=", data)) // desired state
+		els = append(ops, etcd.OpPut(key, data))                    // TODO: add a TTL? (etcd.WithLease)
+
+	} else { // delete message if set to erase
+		ifs = append(ifs, etcdutil.KeyExists(key)) // desired state
+		ops = append(ops, etcd.OpDelete(key))
+	}
+
+	_, err := obj.client.Txn(ctx, ifs, ops, els)
+	msg := "nominate failed"
+	if urls == nil {
+		msg = "unnominate failed"
+	}
+	return errwrap.Wrapf(err, msg)
+}
+
+// advertise idempotently advertises the list of available client endpoints for
+// the given member. If you specify a nil value for urls, then this will remove
+// that member.
+func (obj *EmbdEtcd) advertise(ctx context.Context, hostname string, urls etcdtypes.URLs) error {
+	if obj.Debug {
+		if urls == nil {
+			obj.Logf("unadvertise(%s)...", hostname)
+			defer obj.Logf("unadvertise(%s): done!", hostname)
+		} else {
+			obj.Logf("advertise(%s): %s", hostname, urls.String())
+			defer obj.Logf("advertise(%s): done!", hostname)
+		}
+	}
+	// advertise endpoints
+	key := fmt.Sprintf(obj.NS+endpointsPathFmt, hostname)
+	ifs := []etcd.Cmp{} // list matching the desired state
+	ops := []etcd.Op{}  // list of ops in this txn
+	els := []etcd.Op{}
+	if urls != nil {
+		data := urls.String() // value is usually a "clientURL"
+		// XXX: bug: https://github.com/etcd-io/etcd/issues/10566
+		// XXX: reverse things with els to workaround the bug :(
+		//ifs = append(ifs, etcd.Compare(etcd.Value(key), "!=", data)) // desired state
+		//ops = append(ops, etcd.OpPut(key, data, etcd.WithLease(obj.leaseID)))
+		ifs = append(ifs, etcd.Compare(etcd.Value(key), "=", data)) // desired state
+		ifs = append(ifs, etcd.Compare(etcd.LeaseValue(key), "=", obj.leaseID))
+		els = append(ops, etcd.OpPut(key, data, etcd.WithLease(obj.leaseID)))
+	} else { // delete in this case
+		ifs = append(ifs, etcdutil.KeyExists(key)) // desired state
+		ops = append(ops, etcd.OpDelete(key))
+	}
+
+	_, err := obj.client.Txn(ctx, ifs, ops, els)
+	msg := "advertising failed"
+	if urls == nil {
+		msg = "unadvertising failed"
+	}
+	return errwrap.Wrapf(err, msg)
+}
+
+// getVolunteers returns a urls map of available etcd server volunteers.
+func (obj *EmbdEtcd) getVolunteers(ctx context.Context) (etcdtypes.URLsMap, error) {
+	if obj.Debug {
+		obj.Logf("getVolunteers()")
+		defer obj.Logf("getVolunteers(): done!")
+	}
+	p := obj.NS + VolunteerPath
+	keyMap, err := obj.client.Get(ctx, p, etcd.WithPrefix())
 	if err != nil {
-		return nil, fmt.Errorf("volunteers aren't available: %v", err)
+		return nil, errwrap.Wrapf(err, "can't get peer volunteers")
 	}
 	volunteers := make(etcdtypes.URLsMap)
 	for key, val := range keyMap { // loop through directory of volunteers
-		if !strings.HasPrefix(key, path) {
+		if !strings.HasPrefix(key, p) {
 			continue
 		}
-		name := key[len(path):] // get name of volunteer
-		if val == "" {          // skip "erased" values
+		name := key[len(p):] // get name of volunteer
+		if val == "" {       // skip "erased" values
 			continue
 		}
 		urls, err := etcdtypes.NewURLs(strings.Split(val, ","))
 		if err != nil {
-			return nil, fmt.Errorf("volunteers data format error: %v", err)
+			return nil, errwrap.Wrapf(err, "data format error")
 		}
 		volunteers[name] = urls // add to map
-		if obj.flags.Debug {
-			log.Printf("Etcd: Volunteer(%v): %v", name, val)
-		}
 	}
 	return volunteers, nil
 }
 
-// AdvertiseEndpoints advertises the list of available client endpoints.
-func AdvertiseEndpoints(obj *EmbdEtcd, urls etcdtypes.URLs) error {
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: AdvertiseEndpoints(%v): %v", obj.hostname, urls.String())
-		defer log.Printf("Trace: Etcd: AdvertiseEndpoints(%v): Finished!", obj.hostname)
+// getNominated returns a urls map of nominated etcd server volunteers.
+// NOTE: I know 'nominees' might be more correct, but is less consistent here
+func (obj *EmbdEtcd) getNominated(ctx context.Context) (etcdtypes.URLsMap, error) {
+	if obj.Debug {
+		obj.Logf("getNominated()")
+		defer obj.Logf("getNominated(): done!")
 	}
-	// advertise endpoints
-	endpoints := fmt.Sprintf("%s/endpoints/%s", NS, obj.hostname)
-	ops := []etcd.Op{} // list of ops in this txn
-	if urls != nil {
-		// TODO: add a TTL? (etcd.WithLease)
-		ops = append(ops, etcd.OpPut(endpoints, urls.String())) // value is usually a "clientURL"
-
-	} else { // delete message if set to erase
-		ops = append(ops, etcd.OpDelete(endpoints))
-	}
-
-	if _, err := obj.Txn(nil, ops, nil); err != nil {
-		return fmt.Errorf("endpoint advertising failed") // exit in progress?
-	}
-	return nil
-}
-
-// Endpoints returns a urls map of available etcd server endpoints.
-func Endpoints(obj *EmbdEtcd) (etcdtypes.URLsMap, error) {
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: Endpoints()")
-		defer log.Printf("Trace: Etcd: Endpoints(): Finished!")
-	}
-	path := fmt.Sprintf("%s/endpoints/", NS)
-	keyMap, err := obj.Get(path, etcd.WithPrefix())
+	p := obj.NS + NominatedPath
+	keyMap, err := obj.client.Get(ctx, p, etcd.WithPrefix()) // map[string]string, bool
 	if err != nil {
-		return nil, fmt.Errorf("endpoints aren't available: %v", err)
+		return nil, errwrap.Wrapf(err, "can't get nominated peers")
 	}
-	endpoints := make(etcdtypes.URLsMap)
-	for key, val := range keyMap { // loop through directory of endpoints
-		if !strings.HasPrefix(key, path) {
+	nominated := make(etcdtypes.URLsMap)
+	for key, val := range keyMap { // loop through directory of nominated
+		if !strings.HasPrefix(key, p) {
 			continue
 		}
-		name := key[len(path):] // get name of volunteer
-		if val == "" {          // skip "erased" values
+		name := key[len(p):] // get name of nominee
+		if val == "" {       // skip "erased" values
 			continue
 		}
 		urls, err := etcdtypes.NewURLs(strings.Split(val, ","))
 		if err != nil {
-			return nil, fmt.Errorf("endpoints data format error: %v", err)
+			return nil, errwrap.Wrapf(err, "data format error")
+		}
+		nominated[name] = urls // add to map
+	}
+	return nominated, nil
+}
+
+// getEndpoints returns a urls map of available endpoints for clients.
+func (obj *EmbdEtcd) getEndpoints(ctx context.Context) (etcdtypes.URLsMap, error) {
+	if obj.Debug {
+		obj.Logf("getEndpoints()")
+		defer obj.Logf("getEndpoints(): done!")
+	}
+	p := obj.NS + EndpointsPath
+	keyMap, err := obj.client.Get(ctx, p, etcd.WithPrefix())
+	if err != nil {
+		return nil, errwrap.Wrapf(err, "can't get client endpoints")
+	}
+	endpoints := make(etcdtypes.URLsMap)
+	for key, val := range keyMap { // loop through directory of endpoints
+		if !strings.HasPrefix(key, p) {
+			continue
+		}
+		name := key[len(p):] // get name of volunteer
+		if val == "" {       // skip "erased" values
+			continue
+		}
+		urls, err := etcdtypes.NewURLs(strings.Split(val, ","))
+		if err != nil {
+			return nil, errwrap.Wrapf(err, "data format error")
 		}
 		endpoints[name] = urls // add to map
-		if obj.flags.Debug {
-			log.Printf("Etcd: Endpoint(%v): %v", name, val)
-		}
 	}
 	return endpoints, nil
 }
-
-// SetHostnameConverged sets whether a specific hostname is converged.
-func SetHostnameConverged(obj *EmbdEtcd, hostname string, isConverged bool) error {
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: SetHostnameConverged(%s): %v", hostname, isConverged)
-		defer log.Printf("Trace: Etcd: SetHostnameConverged(%v): Finished!", hostname)
-	}
-	converged := fmt.Sprintf("%s/converged/%s", NS, hostname)
-	op := []etcd.Op{etcd.OpPut(converged, fmt.Sprintf("%t", isConverged))}
-	if _, err := obj.Txn(nil, op, nil); err != nil { // TODO: do we need a skipConv flag here too?
-		return fmt.Errorf("set converged failed") // exit in progress?
-	}
-	return nil
-}
-
-// HostnameConverged returns a map of every hostname's converged state.
-func HostnameConverged(obj *EmbdEtcd) (map[string]bool, error) {
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: HostnameConverged()")
-		defer log.Printf("Trace: Etcd: HostnameConverged(): Finished!")
-	}
-	path := fmt.Sprintf("%s/converged/", NS)
-	keyMap, err := obj.ComplexGet(path, true, etcd.WithPrefix()) // don't un-converge
-	if err != nil {
-		return nil, fmt.Errorf("converged values aren't available: %v", err)
-	}
-	converged := make(map[string]bool)
-	for key, val := range keyMap { // loop through directory...
-		if !strings.HasPrefix(key, path) {
-			continue
-		}
-		name := key[len(path):] // get name of key
-		if val == "" {          // skip "erased" values
-			continue
-		}
-		b, err := strconv.ParseBool(val)
-		if err != nil {
-			return nil, fmt.Errorf("converged data format error: %v", err)
-		}
-		converged[name] = b // add to map
-	}
-	return converged, nil
-}
-
-// AddHostnameConvergedWatcher adds a watcher with a callback that runs on
-// hostname state changes.
-func AddHostnameConvergedWatcher(obj *EmbdEtcd, callbackFn func(map[string]bool) error) (func(), error) {
-	path := fmt.Sprintf("%s/converged/", NS)
-	internalCbFn := func(re *RE) error {
-		// TODO: get the value from the response, and apply delta...
-		// for now, just run a get operation which is easier to code!
-		m, err := HostnameConverged(obj)
-		if err != nil {
-			return err
-		}
-		return callbackFn(m) // call my function
-	}
-	return obj.AddWatcher(path, internalCbFn, true, true, etcd.WithPrefix()) // no block and no converger reset
-}
-
-// SetClusterSize sets the ideal target cluster size of etcd peers.
-func SetClusterSize(obj *EmbdEtcd, value uint16) error {
-	if obj.flags.Trace {
-		log.Printf("Trace: Etcd: SetClusterSize(): %v", value)
-		defer log.Printf("Trace: Etcd: SetClusterSize(): Finished!")
-	}
-	key := fmt.Sprintf("%s/idealClusterSize", NS)
-
-	if err := obj.Set(key, strconv.FormatUint(uint64(value), 10)); err != nil {
-		return fmt.Errorf("function SetClusterSize failed: %v", err) // exit in progress?
-	}
-	return nil
-}
-
-// GetClusterSize gets the ideal target cluster size of etcd peers.
-func GetClusterSize(obj *EmbdEtcd) (uint16, error) {
-	key := fmt.Sprintf("%s/idealClusterSize", NS)
-	keyMap, err := obj.Get(key)
-	if err != nil {
-		return 0, fmt.Errorf("function GetClusterSize failed: %v", err)
-	}
-
-	val, exists := keyMap[key]
-	if !exists || val == "" {
-		return 0, fmt.Errorf("function GetClusterSize failed: %v", err)
-	}
-
-	v, err := strconv.ParseUint(val, 10, 16)
-	if err != nil {
-		return 0, fmt.Errorf("function GetClusterSize failed: %v", err)
-	}
-	return uint16(v), nil
-}
-
-// MemberAdd adds a member to the cluster.
-func MemberAdd(obj *EmbdEtcd, peerURLs etcdtypes.URLs) (*etcd.MemberAddResponse, error) {
-	//obj.Connect(false) // TODO: ?
-	ctx := context.Background()
-	var response *etcd.MemberAddResponse
-	var err error
-	for {
-		if obj.exiting { // the exit signal has been sent!
-			return nil, fmt.Errorf("exiting etcd")
-		}
-		obj.rLock.RLock()
-		response, err = obj.client.MemberAdd(ctx, peerURLs.StringSlice())
-		obj.rLock.RUnlock()
-		if err == nil {
-			break
-		}
-		if ctx, err = obj.CtxError(ctx, err); err != nil {
-			return nil, err
-		}
-	}
-	return response, nil
-}
-
-// MemberRemove removes a member by mID and returns if it worked, and also
-// if there was an error. This is because it might have run without error, but
-// the member wasn't found, for example.
-func MemberRemove(obj *EmbdEtcd, mID uint64) (bool, error) {
-	//obj.Connect(false) // TODO: ?
-	ctx := context.Background()
-	for {
-		if obj.exiting { // the exit signal has been sent!
-			return false, fmt.Errorf("exiting etcd")
-		}
-		obj.rLock.RLock()
-		_, err := obj.client.MemberRemove(ctx, mID)
-		obj.rLock.RUnlock()
-		if err == nil {
-			break
-		} else if err == rpctypes.ErrMemberNotFound {
-			// if we get this, member already shut itself down :)
-			return false, nil
-		}
-		if ctx, err = obj.CtxError(ctx, err); err != nil {
-			return false, err
-		}
-	}
-	return true, nil
-}
-
-// Members returns information on cluster membership.
-// The member ID's are the keys, because an empty names means unstarted!
-// TODO: consider queueing this through the main loop with CtxError(ctx, err)
-func Members(obj *EmbdEtcd) (map[uint64]string, error) {
-	//obj.Connect(false) // TODO: ?
-	ctx := context.Background()
-	var response *etcd.MemberListResponse
-	var err error
-	for {
-		if obj.exiting { // the exit signal has been sent!
-			return nil, fmt.Errorf("exiting etcd")
-		}
-		obj.rLock.RLock()
-		if obj.flags.Trace {
-			log.Printf("Trace: Etcd: Members(): Endpoints are: %v", obj.client.Endpoints())
-		}
-		response, err = obj.client.MemberList(ctx)
-		obj.rLock.RUnlock()
-		if err == nil {
-			break
-		}
-		if ctx, err = obj.CtxError(ctx, err); err != nil {
-			return nil, err
-		}
-	}
-
-	members := make(map[uint64]string)
-	for _, x := range response.Members {
-		members[x.ID] = x.Name // x.Name will be "" if unstarted!
-	}
-	return members, nil
-}
-
-// Leader returns the current leader of the etcd server cluster.
-func Leader(obj *EmbdEtcd) (string, error) {
-	//obj.Connect(false) // TODO: ?
-	membersMap, err := Members(obj)
-	if err != nil {
-		return "", err
-	}
-	addresses := obj.LocalhostClientURLs() // heuristic, but probably correct
-	if len(addresses) == 0 {
-		// probably a programming error...
-		return "", fmt.Errorf("programming error")
-	}
-	endpoint := addresses[0].Host // FIXME: arbitrarily picked the first one
-
-	// part two
-	ctx := context.Background()
-	var response *etcd.StatusResponse
-	for {
-		if obj.exiting { // the exit signal has been sent!
-			return "", fmt.Errorf("exiting etcd")
-		}
-
-		obj.rLock.RLock()
-		response, err = obj.client.Maintenance.Status(ctx, endpoint)
-		obj.rLock.RUnlock()
-		if err == nil {
-			break
-		}
-		if ctx, err = obj.CtxError(ctx, err); err != nil {
-			return "", err
-		}
-	}
-
-	// isLeader: response.Header.MemberId == response.Leader
-	for id, name := range membersMap {
-		if id == response.Leader {
-			return name, nil
-		}
-	}
-	return "", fmt.Errorf("members map is not current") // not found
-}
diff --git a/etcd/server.go b/etcd/server.go
new file mode 100644
index 00000000..11371b09
--- /dev/null
+++ b/etcd/server.go
@@ -0,0 +1,309 @@
+// Mgmt
+// Copyright (C) 2013-2019+ James Shubin and the project contributors
+// Written by James Shubin <james@shubin.ca> and the project contributors
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package etcd
+
+import (
+	"fmt"
+	"os"
+	"path"
+	"strings"
+	"time"
+
+	"github.com/purpleidea/mgmt/util"
+	"github.com/purpleidea/mgmt/util/errwrap"
+
+	"github.com/coreos/etcd/embed"
+	etcdtypes "github.com/coreos/etcd/pkg/types"
+)
+
+const (
+	// MaxServerStartTimeout is the amount of time to wait for the server
+	// to start before considering it a failure. If you hit this timeout,
+	// let us know so that we can analyze the situation, and increase this
+	// if necessary.
+	MaxServerStartTimeout = 60 * time.Second
+
+	// MaxServerCloseTimeout is the maximum amount of time we'll wait for
+	// the server to close down. If it exceeds this, it's probably a bug.
+	MaxServerCloseTimeout = 15 * time.Second
+
+	// MaxServerRetries is the maximum number of times we can try to restart
+	// the server if it fails on startup. This can help workaround some
+	// timing bugs in etcd.
+	MaxServerRetries = 5
+
+	// ServerRetryWait is the amount of time to wait between retries.
+	ServerRetryWait = 500 * time.Millisecond
+)
+
+// serverAction represents the desired server state.
+type serverAction uint8
+
+const (
+	serverActionStop serverAction = iota
+	serverActionStart
+)
+
+// serverAction returns whether we should do the action requested. The action is
+// either start (true) or stop (false) as input. For example, if we run this as:
+// true -> true, it means we asked if we should start, and the answer is yes.
+func (obj *EmbdEtcd) serverAction(action serverAction) bool {
+	// check if i have actually volunteered first of all...
+	if obj.NoServer || len(obj.ServerURLs) == 0 {
+		obj.Logf("inappropriately nominated, rogue or stale server?")
+		return false // no action
+	}
+
+	_, exists := obj.nominated[obj.Hostname] // am i nominated?
+
+	// if there are no other peers, we create a new server
+	// TODO: do we need an || len(obj.nominated) == 0 if we're the first?
+	newCluster := len(obj.nominated) == 1 && exists
+
+	switch action {
+	case serverActionStart:
+		// we start if...
+		return obj.server == nil && (exists || newCluster)
+
+	case serverActionStop:
+		// we stop if...
+		return obj.server != nil && !exists
+	}
+
+	return false // no action needed
+}
+
+// runServer kicks of a new embedded etcd server. It exits when the server shuts
+// down. The exit can be triggered at any time by running destroyServer or if it
+// exits due to some condition like an error.
+// FIXME: should peerURLsMap just use obj.nominated instead?
+func (obj *EmbdEtcd) runServer(newCluster bool, peerURLsMap etcdtypes.URLsMap) (reterr error) {
+	obj.Logf("server: runServer: (newCluster=%t): %+v", newCluster, peerURLsMap)
+	defer obj.Logf("server: runServer: done!")
+	//obj.serverwg.Wait() // bonus, but instead, a mutex would be race free!
+	obj.serverwg.Add(1)
+	defer obj.serverwg.Done()
+	defer obj.serverExitsSignal.Send()
+	dataDir := fmt.Sprintf("%s/", path.Join(obj.Prefix, "server"))
+	if err := os.MkdirAll(dataDir, 0770); err != nil {
+		return errwrap.Wrapf(err, "couldn't mkdir: %s", dataDir)
+	}
+
+	memberName := obj.Hostname
+
+	// if no peer URLs exist, then starting a server is mostly only for some
+	// testing, but etcd doesn't allow the value to be empty so we use this!
+	peerURLs, err := etcdtypes.NewURLs([]string{"http://localhost:0"})
+	if err != nil {
+		return errwrap.Wrapf(err, "invalid URLs")
+	}
+	if len(obj.ServerURLs) > 0 {
+		peerURLs = obj.ServerURLs
+	}
+	initialPeerURLsMap, err := copyURLsMap(peerURLsMap)
+	if err != nil {
+		return errwrap.Wrapf(err, "error copying URLsMap")
+	}
+	// add self to list if it's not already in there...
+	if _, exists := peerURLsMap[memberName]; !exists {
+		initialPeerURLsMap[memberName] = peerURLs
+	}
+
+	// TODO: do we need to copy?
+	aPUrls := peerURLs
+	if len(obj.AServerURLs) > 0 {
+		aPUrls = obj.AServerURLs
+	}
+	// NOTE: this logic is similar to obj.curls()
+	aCUrls := obj.ClientURLs
+	if len(obj.AClientURLs) > 0 {
+		aCUrls = obj.AClientURLs
+	}
+
+	// embed etcd
+	cfg := embed.NewConfig()
+	cfg.Name = memberName // hostname
+	cfg.Dir = dataDir
+	cfg.LPUrls = peerURLs
+	cfg.LCUrls = obj.ClientURLs
+	cfg.APUrls = aPUrls
+	cfg.ACUrls = aCUrls
+	cfg.StrictReconfigCheck = false // XXX: workaround https://github.com/coreos/etcd/issues/6305
+	cfg.MaxTxnOps = DefaultMaxTxnOps
+
+	cfg.InitialCluster = initialPeerURLsMap.String() // including myself!
+	if newCluster {
+		cfg.ClusterState = embed.ClusterStateFlagNew
+	} else {
+		cfg.ClusterState = embed.ClusterStateFlagExisting
+	}
+	//cfg.ForceNewCluster = newCluster // TODO: ?
+
+	if err := cfg.Validate(); err != nil {
+		return errwrap.Wrapf(err, "server config is invalid")
+	}
+
+	obj.Logf("server: starting...")
+	// TODO: etcd panics with: `create wal error: no space left on device`
+	// see: https://github.com/etcd-io/etcd/issues/10588
+	defer func() {
+		if r := recover(); r != nil { // magic panic catcher
+			obj.Logf("server: panic: %s", r)
+			reterr = fmt.Errorf("panic during start with: %s", r) // set named return err
+		}
+	}()
+	// XXX: workaround: https://github.com/etcd-io/etcd/issues/10626
+	// This runs when we see the nominate operation. This could also error
+	// if this races to start up, and happens before the member add runs.
+	count := 0
+	for {
+		obj.server, err = embed.StartEtcd(cfg)
+		if err == nil {
+			break
+		}
+		e := err.Error()
+		// catch: error validating peerURLs ... member count is unequal
+		if strings.HasPrefix(e, "error validating peerURLs") && strings.HasSuffix(e, "member count is unequal") {
+			count++
+			if count > MaxServerRetries {
+				err = errwrap.Wrapf(err, "workaround retries (%d) exceeded", MaxServerRetries)
+				break
+			}
+			obj.Logf("waiting %s for retry", ServerRetryWait.String())
+			time.Sleep(ServerRetryWait)
+			continue
+		}
+		break
+	}
+	defer func() {
+		obj.server = nil // important because this is used as an isRunning flag
+	}()
+	if err != nil {
+		// early debug logs in case something downstream blocks
+		if obj.Debug {
+			obj.Logf("server failing with: %+v", err)
+		}
+		return errwrap.Wrapf(err, "server start failed")
+	}
+
+	closedChan := make(chan struct{})
+	defer func() {
+		select {
+		case <-time.After(MaxServerCloseTimeout):
+			obj.Logf("server: close timeout of %s reached", MaxServerCloseTimeout.String())
+		case <-closedChan:
+		}
+	}()
+	defer func() {
+		// no wg here, since we want to let it die on exit if need be...
+		// XXX: workaround: https://github.com/etcd-io/etcd/issues/10600
+		go func() {
+			obj.server.Close() // this blocks until server has stopped
+			close(closedChan)  // woo!
+		}()
+	}()
+	defer obj.server.Server.Stop() // trigger a shutdown
+
+	select {
+	case <-obj.server.Server.ReadyNotify(): // we hang here if things are bad
+		obj.Logf("server: ready") // it didn't hang!
+
+	// TODO: should we wait for this notification elsewhere?
+	case <-obj.server.Server.StopNotify(): // it's going down now...
+		err := fmt.Errorf("received stop notification")
+		obj.Logf("server: stopped: %v", err)
+		return err
+
+	case <-time.After(MaxServerStartTimeout):
+		err := fmt.Errorf("start timeout of %s reached", MaxServerStartTimeout.String())
+		obj.Logf("server: %v", err)
+		return err
+	}
+
+	obj.serverID = uint64(obj.server.Server.ID()) // store member id for internal use
+	defer func() {
+		obj.serverID = 0 // reset
+	}()
+	obj.addSelfState() // add to endpoints list so self client can connect!
+	obj.setEndpoints() // sync client with new endpoints
+	defer obj.setEndpoints()
+	defer obj.rmMemberState(obj.Hostname)
+
+	obj.serverReadySignal.Send() // send a signal, and then reset the signal
+
+	for {
+		select {
+		case err, ok := <-obj.server.Err():
+			if !ok { // server shut down
+				return errwrap.Wrapf(err, "server shutdown error")
+			}
+
+		case <-obj.serverExit.Signal():
+			return errwrap.Wrapf(obj.serverExit.Error(), "server signal exit")
+		}
+	}
+
+	//return nil // unreachable
+}
+
+// destroyServer shuts down the embedded etcd server portion.
+func (obj *EmbdEtcd) destroyServer() error {
+	// This function must be thread-safe because a destroy request will
+	// cause runServer to return, which then runs the defer of this function
+	// which is meant to clean up when an independent, normal runServer
+	// return happens. Add the mutex to protect against races on this call.
+	obj.servermu.Lock()
+	defer obj.servermu.Unlock()
+	if obj.server == nil {
+		return nil // don't error on redundant calls
+	}
+	obj.Logf("server: destroyServer...")
+	defer obj.Logf("server: destroyServer: done!")
+
+	obj.serverExit.Done(nil) // trigger an exit
+
+	obj.serverwg.Wait() // wait for server to finish shutting down
+	defer func() {
+		obj.serverExit = util.NewEasyExit() // reset
+	}()
+	return obj.serverExit.Error()
+}
+
+// ServerReady returns a channel that closes when we're up and running. This
+// process happens when calling runServer. If runServer is never called, this
+// will never happen. It also returns a cancel/ack function which must be called
+// once the signal is received or we are done watching it. This is because this
+// is a cyclical signal which happens, and then gets reset as the server starts
+// up, shuts down, and repeats the cycle. The cancel/ack function ensures that
+// we only watch a signal when it's ready to be read, and only reset it when we
+// are done watching it.
+func (obj *EmbdEtcd) ServerReady() (<-chan struct{}, func()) {
+	return obj.serverReadySignal.Subscribe()
+}
+
+// ServerExited returns a channel that closes when the server is destroyed. This
+// process happens after runServer exits. If runServer is never called, this
+// will never happen. It also returns a cancel/ack function which must be called
+// once the signal is received or we are done watching it. This is because this
+// is a cyclical signal which happens, and then gets reset as the server starts
+// up, shuts down, and repeats the cycle. The cancel/ack function ensures that
+// we only watch a signal when it's ready to be read, and only reset it when we
+// are done watching it.
+func (obj *EmbdEtcd) ServerExited() (<-chan struct{}, func()) {
+	return obj.serverExitsSignal.Subscribe()
+}
diff --git a/etcd/tasks.go b/etcd/tasks.go
new file mode 100644
index 00000000..15399022
--- /dev/null
+++ b/etcd/tasks.go
@@ -0,0 +1,163 @@
+// Mgmt
+// Copyright (C) 2013-2019+ James Shubin and the project contributors
+// Written by James Shubin <james@shubin.ca> and the project contributors
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package etcd
+
+import (
+	"fmt"
+
+	"github.com/purpleidea/mgmt/util/errwrap"
+)
+
+// task represents a single task to run. These are useful for pending work that
+// we want to schedule, but that shouldn't permanently error the system on
+// error. In particular idempotent tasks that are safe are ideal for this queue.
+// The tasks can be added with queueTask.
+type task struct {
+	name   string       // name of task
+	fn     func() error // task to run
+	retry  int          // number of times to retry on error, -1 for infinite
+	block  bool         // should we block the queue until this succeeds?
+	report bool         // should we report the error on permanent failure?
+}
+
+// String prints a string representation of the struct.
+func (obj *task) String() string {
+	return fmt.Sprintf("task(%s)", obj.name)
+}
+
+// queueTask adds a task to the task worker queue. If you want to specify any
+// properties that differ from the defaults, use queueRawTask instead.
+func (obj *EmbdEtcd) queueTask(fn func() error) error {
+	obj.taskQueueLock.Lock()
+	obj.taskQueueLock.Unlock()
+	t := &task{
+		fn: fn,
+	}
+	return obj.queueRawTask(t)
+}
+
+// queueRawTask adds a task of any format to the queue. You should not name your
+// task a string which could match a positive integer. Those names are used when
+// an unnamed task is specified and the system needs to generate a name.
+func (obj *EmbdEtcd) queueRawTask(t *task) error {
+	if obj.Debug {
+		obj.Logf("queueRawTask()")
+		defer obj.Logf("queueRawTask(): done!")
+	}
+
+	if t == nil {
+		return fmt.Errorf("nil task")
+	}
+
+	obj.taskQueueLock.Lock()
+	defer obj.taskQueueLock.Unlock()
+	if obj.taskQueue == nil { // killed signal
+		return fmt.Errorf("task queue killed")
+	}
+	if t.name == "" {
+		obj.taskQueueID++ // increment
+		t.name = fmt.Sprintf("%d", obj.taskQueueID)
+	}
+
+	obj.taskQueue = append(obj.taskQueue, t)
+	if !obj.taskQueueRunning {
+		obj.taskQueueRunning = true
+		obj.taskQueueWg.Add(1)
+		go obj.runTaskQueue()
+	}
+	return nil
+}
+
+// killTaskQueue empties the task queue, causing it to shutdown.
+func (obj *EmbdEtcd) killTaskQueue() int {
+	obj.taskQueueLock.Lock()
+	count := len(obj.taskQueue)
+	obj.taskQueue = nil // clear queue
+	obj.taskQueueLock.Unlock()
+
+	obj.taskQueueWg.Wait()    // wait for queue to exit
+	obj.taskQueue = []*task{} // reset
+	return count              // number of tasks deleted
+}
+
+// runTaskQueue processes the task queue. This is started automatically by
+// queueTask if needed. It will shut itself down when the queue is empty.
+func (obj *EmbdEtcd) runTaskQueue() {
+	defer obj.taskQueueWg.Done() // added in queueTask
+	for {
+		obj.taskQueueLock.Lock()
+		if obj.taskQueue == nil || len(obj.taskQueue) == 0 {
+			defer obj.taskQueueLock.Unlock()
+			obj.taskQueueRunning = false
+			return
+		}
+		var t *task
+		t, obj.taskQueue = obj.taskQueue[0], obj.taskQueue[1:]
+		obj.taskQueueLock.Unlock()
+
+		if !t.block {
+			if obj.Debug {
+				obj.Logf("%s: run...", t)
+			}
+			err := t.fn()
+			if obj.Debug {
+				obj.Logf("%s: done: %v", t, err)
+			}
+			if err != nil {
+				if t.retry == 0 {
+					if t.report {
+						// send a permanent error
+						// XXX: guard errChan for early close... hmmm
+						select {
+						case obj.errChan <- errwrap.Wrapf(err, "task error"):
+						}
+					}
+					continue
+				}
+				if t.retry > 0 { // don't decrement from -1
+					t.retry--
+				}
+				obj.taskQueueLock.Lock()
+				if obj.taskQueue != nil { // killed signal
+					obj.taskQueue = append(obj.taskQueue, t)
+				}
+				obj.taskQueueLock.Unlock()
+			}
+			continue
+		}
+
+		// block
+		for {
+			if obj.Debug {
+				obj.Logf("%s: run...", t)
+			}
+			err := t.fn()
+			if obj.Debug {
+				obj.Logf("%s: done: %v", t, err)
+			}
+			if err != nil {
+				if t.retry == 0 {
+					break
+				}
+				if t.retry > 0 { // don't decrement from -1
+					t.retry--
+				}
+			}
+		}
+	}
+}
diff --git a/etcd/util.go b/etcd/util.go
new file mode 100644
index 00000000..63a0d643
--- /dev/null
+++ b/etcd/util.go
@@ -0,0 +1,173 @@
+// Mgmt
+// Copyright (C) 2013-2019+ James Shubin and the project contributors
+// Written by James Shubin <james@shubin.ca> and the project contributors
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package etcd
+
+// TODO: move to sub-package if this expands in utility or is used elsewhere...
+
+import (
+	"fmt"
+	"net/url"
+	"strings"
+
+	"github.com/purpleidea/mgmt/util/errwrap"
+
+	etcdtypes "github.com/coreos/etcd/pkg/types"
+)
+
+// copyURL copies a URL.
+// TODO: submit this upstream to etcd ?
+func copyURL(u *url.URL) (*url.URL, error) {
+	if u == nil {
+		return nil, fmt.Errorf("empty URL specified")
+	}
+	return url.Parse(u.String()) // copy it
+}
+
+// copyURLs copies a URLs.
+// TODO: submit this upstream to etcd ?
+func copyURLs(urls etcdtypes.URLs) (etcdtypes.URLs, error) {
+	out := []url.URL{}
+	for _, x := range urls {
+		u, err := copyURL(&x)
+		if err != nil {
+			return nil, err
+		}
+		out = append(out, *u)
+	}
+	return out, nil
+}
+
+// copyURLsMap copies a URLsMap.
+// TODO: submit this upstream to etcd ?
+func copyURLsMap(urlsMap etcdtypes.URLsMap) (etcdtypes.URLsMap, error) {
+	out := make(etcdtypes.URLsMap)
+	for k, v := range urlsMap {
+		urls, err := copyURLs(v)
+		if err != nil {
+			return nil, err
+		}
+		out[k] = urls
+	}
+	return out, nil
+}
+
+// cmpURLs compares two URLs, and returns nil if they are the same.
+func cmpURLs(u1, u2 etcdtypes.URLs) error {
+	if (u1 == nil) != (u2 == nil) { // xor
+		return fmt.Errorf("lists differ")
+	}
+	if len(u1) != len(u2) {
+		return fmt.Errorf("length of lists is not the same")
+	}
+
+	for i, v1 := range u1 {
+		if v1 != u2[i] {
+			return fmt.Errorf("index %d differs", i)
+		}
+	}
+
+	return nil
+}
+
+// cmpURLsMap compares two URLsMap's, and returns nil if they are the same.
+func cmpURLsMap(m1, m2 etcdtypes.URLsMap) error {
+	if (m1 == nil) != (m2 == nil) { // xor
+		return fmt.Errorf("maps differ")
+	}
+	if len(m1) != len(m2) {
+		return fmt.Errorf("length of maps is not the same")
+	}
+
+	for k, v1 := range m1 {
+		v2, exists := m2[k]
+		if !exists {
+			return fmt.Errorf("key `%s` not found in map 2", k)
+		}
+		if err := cmpURLs(v1, v2); err != nil {
+			return errwrap.Wrapf(err, "values at key `%s` differ", k)
+		}
+	}
+
+	return nil
+}
+
+// newURLsMap is a helper to build a new URLsMap without having to import the
+// messy etcdtypes package.
+func newURLsMap() etcdtypes.URLsMap {
+	return make(etcdtypes.URLsMap)
+}
+
+func fromURLsToStringList(urls etcdtypes.URLs) []string {
+	result := []string{}
+	for _, u := range urls { // flatten map
+		result = append(result, u.String()) // use full url including scheme
+	}
+	return result
+}
+
+// fromURLsMapToStringList flattens a map of URLs into a single string list.
+// Remember to sort the result if you want it to be deterministic!
+func fromURLsMapToStringList(m etcdtypes.URLsMap) []string {
+	result := []string{}
+	for _, x := range m { // flatten map
+		for _, u := range x {
+			result = append(result, u.String()) // use full url including scheme
+		}
+	}
+	return result
+}
+
+// validateURLsMap checks if each embedded URL is parseable correctly.
+//func validateURLsMap(urlsMap etcdtypes.URLsMap) error {
+//	_, err := copyURLsMap(urlsMap) // would fail if anything didn't parse
+//	return err
+//}
+
+// localhostURLs returns the most localhost like URLs for direct connection.
+// This gets clients to talk to the local servers first before looking remotely.
+// TODO: improve this algorithm as it's currently a bad heuristic
+func localhostURLs(urls etcdtypes.URLs) etcdtypes.URLs {
+	out := etcdtypes.URLs{}
+	for _, u := range urls {
+		// "localhost" or anything in 127.0.0.0/8 is valid!
+		if strings.HasPrefix(u.Host, "localhost") || strings.HasPrefix(u.Host, "127.") {
+			out = append(out, u)
+			continue
+		}
+		// or ipv6 localhost
+		// TODO: are there others to add here?
+		if strings.HasPrefix(u.Host, "[::1]") {
+			out = append(out, u)
+			continue
+		}
+		// or local unix domain sockets
+		if u.Scheme == "unix" {
+			out = append(out, u)
+			continue
+		}
+	}
+	return out
+}
+
+//func urlRemoveScheme(urls etcdtypes.URLs) []string {
+//	strs := []string{}
+//	for _, u := range urls {
+//		strs = append(strs, u.Host) // remove http:// prefix
+//	}
+//	return strs
+//}
diff --git a/etcd/util_test.go b/etcd/util_test.go
new file mode 100644
index 00000000..5621e570
--- /dev/null
+++ b/etcd/util_test.go
@@ -0,0 +1,189 @@
+// Mgmt
+// Copyright (C) 2013-2019+ James Shubin and the project contributors
+// Written by James Shubin <james@shubin.ca> and the project contributors
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+// +build !root
+
+package etcd
+
+import (
+	"net/url"
+	"testing"
+)
+
+func TestCopyURL0(t *testing.T) {
+	// list of urls to test
+	strs := []string{
+		"",
+		"http://192.168.13.42:2379",
+		"https://192.168.13.42:2380",
+		"http://192.168.13.42",
+		"https://192.168.13.42",
+	}
+	for _, str := range strs {
+		t.Logf("testing: `%s`", str)
+		u1, err := url.Parse(str)
+		if err != nil {
+			t.Errorf("url did not parse: %+v", err)
+			continue
+		}
+
+		u2, err := copyURL(u1)
+		if err != nil {
+			t.Errorf("url did not copy: %+v", err)
+			continue
+		}
+
+		if s := u2.String(); s != str {
+			t.Errorf("url did not cmp, got: `%s`, expected: `%s`", s, str)
+		}
+
+		// bonus test (add to separate lists of size one)
+		if err := cmpURLs([]url.URL{*u1}, []url.URL{*u2}); err != nil {
+			t.Errorf("urls did not cmp, err: %+v", err)
+		}
+	}
+}
+
+func TestCopyURLs0(t *testing.T) {
+	// list of urls lists to test
+	nstrs := [][]string{
+		{}, // empty!
+		{
+			"http://192.168.13.42:2379",
+			"https://192.168.13.42:2380",
+			"http://192.168.13.42",
+			"https://192.168.13.42",
+		},
+		{
+			"http://192.168.42.42:2379",
+			"https://192.168.13.42:2380",
+			"http://192.168.99.42",
+			"https://10.10.1.255",
+		},
+		{
+			"http://example.com:2379",
+			"https://purpleidea.com/:2379",
+			"http://192.168.13.42",
+			"https://192.168.13.42",
+		},
+	}
+	for _, strs := range nstrs {
+		t.Logf("testing: `%s`", strs)
+
+		urls1 := []url.URL{}
+		for _, str := range strs {
+			u, err := url.Parse(str)
+			if err != nil {
+				t.Errorf("url did not parse: %+v", err)
+				continue
+			}
+			urls1 = append(urls1, *u)
+		}
+
+		urls2, err := copyURLs(urls1)
+		if err != nil {
+			t.Errorf("urls did not copy: %+v", err)
+			continue
+		}
+
+		if err := cmpURLs(urls1, urls2); err != nil {
+			t.Errorf("urls did not cmp, err: %+v", err)
+		}
+	}
+}
+
+func TestCopyURLsMap0(t *testing.T) {
+	// list of urls lists to test
+	nmstrs := []map[string][]string{
+		{}, // empty!
+		{
+			"h1": []string{}, // empty
+			"h2": []string{}, // empty
+			"h3": []string{}, // empty
+		},
+		{
+			"h1": []string{}, // empty
+			"h2": nil,        // nil !
+			"h3": []string{}, // empty
+		},
+		{
+			"h1": []string{}, // empty
+			"h2": []string{
+				"http://example.com:2379",
+				"https://purpleidea.com/:2379",
+				"http://192.168.13.42",
+				"https://192.168.13.42",
+			},
+		},
+		{
+			"h1": []string{
+				"http://192.168.13.42:2379",
+				"https://192.168.13.42:2380",
+				"http://192.168.13.42",
+				"https://192.168.13.42",
+			},
+			"h2": []string{
+				"http://example.com:2379",
+				"https://purpleidea.com/:2379",
+				"http://192.168.13.42",
+				"https://192.168.13.42",
+			},
+		},
+		{
+			"h1": []string{
+				"http://192.168.13.42:2379",
+				"https://192.168.13.42:2380",
+				"http://192.168.13.42",
+				"https://192.168.13.42",
+			},
+			"h2": nil, // nil !
+			"h3": []string{
+				"http://example.com:2379",
+				"https://purpleidea.com/:2379",
+				"http://192.168.13.42",
+				"https://192.168.13.42",
+			},
+		},
+	}
+
+	for _, mstrs := range nmstrs {
+		t.Logf("testing: `%s`", mstrs)
+		urlsMap1 := newURLsMap()
+		for key, strs := range mstrs {
+			urls := []url.URL{}
+			for _, str := range strs {
+				u, err := url.Parse(str)
+				if err != nil {
+					t.Errorf("url did not parse: %+v", err)
+					continue
+				}
+				urls = append(urls, *u)
+			}
+			urlsMap1[key] = urls
+		}
+
+		urlsMap2, err := copyURLsMap(urlsMap1)
+		if err != nil {
+			t.Errorf("urlsMap did not copy: %+v", err)
+			continue
+		}
+
+		if err := cmpURLsMap(urlsMap1, urlsMap2); err != nil {
+			t.Errorf("urlsMap did not cmp, err: %+v", err)
+		}
+	}
+}
diff --git a/etcd/world.go b/etcd/world.go
index 0240bcab..3ec9099b 100644
--- a/etcd/world.go
+++ b/etcd/world.go
@@ -18,19 +18,27 @@
 package etcd
 
 import (
+	"context"
 	"fmt"
 	"net/url"
 	"strings"
 
 	"github.com/purpleidea/mgmt/engine"
+	"github.com/purpleidea/mgmt/etcd/chooser"
+	"github.com/purpleidea/mgmt/etcd/client"
+	"github.com/purpleidea/mgmt/etcd/client/resources"
+	"github.com/purpleidea/mgmt/etcd/client/str"
+	"github.com/purpleidea/mgmt/etcd/client/strmap"
 	etcdfs "github.com/purpleidea/mgmt/etcd/fs"
+	"github.com/purpleidea/mgmt/etcd/interfaces"
 	"github.com/purpleidea/mgmt/etcd/scheduler"
+	"github.com/purpleidea/mgmt/util"
 )
 
 // World is an etcd backed implementation of the World interface.
 type World struct {
 	Hostname       string // uuid for the consumer of these
-	EmbdEtcd       *EmbdEtcd
+	Client         interfaces.Client
 	MetadataPrefix string    // expected metadata prefix
 	StoragePrefix  string    // storage prefix for etcdfs storage
 	StandaloneFs   engine.Fs // store an fs here for local usage
@@ -40,72 +48,113 @@ type World struct {
 
 // ResWatch returns a channel which spits out events on possible exported
 // resource changes.
-func (obj *World) ResWatch() chan error {
-	return WatchResources(obj.EmbdEtcd)
+func (obj *World) ResWatch(ctx context.Context) (chan error, error) {
+	return resources.WatchResources(ctx, obj.Client)
 }
 
 // ResExport exports a list of resources under our hostname namespace.
 // Subsequent calls replace the previously set collection atomically.
-func (obj *World) ResExport(resourceList []engine.Res) error {
-	return SetResources(obj.EmbdEtcd, obj.Hostname, resourceList)
+func (obj *World) ResExport(ctx context.Context, resourceList []engine.Res) error {
+	return resources.SetResources(ctx, obj.Client, obj.Hostname, resourceList)
 }
 
 // ResCollect gets the collection of exported resources which match the filter.
 // It does this atomically so that a call always returns a complete collection.
-func (obj *World) ResCollect(hostnameFilter, kindFilter []string) ([]engine.Res, error) {
+func (obj *World) ResCollect(ctx context.Context, hostnameFilter, kindFilter []string) ([]engine.Res, error) {
 	// XXX: should we be restricted to retrieving resources that were
 	// exported with a tag that allows or restricts our hostname? We could
 	// enforce that here if the underlying API supported it... Add this?
-	return GetResources(obj.EmbdEtcd, hostnameFilter, kindFilter)
+	return resources.GetResources(ctx, obj.Client, hostnameFilter, kindFilter)
+}
+
+// IdealClusterSizeWatch returns a stream of errors anytime the cluster-wide
+// dynamic cluster size setpoint changes.
+func (obj *World) IdealClusterSizeWatch(ctx context.Context) (chan error, error) {
+	c := client.NewClientFromSimple(obj.Client, ChooserPath)
+	if err := c.Init(); err != nil {
+		return nil, err
+	}
+	util.WgFromCtx(ctx).Add(1)
+	go func() {
+		util.WgFromCtx(ctx).Done()
+		// This must get closed *after* because it will not finish until
+		// the Watcher returns, because it contains a wg.Wait() in it...
+		defer c.Close() // ignore error
+		select {
+		case <-ctx.Done():
+		}
+	}()
+	return c.Watcher(ctx, chooser.IdealDynamicSizePath)
+}
+
+// IdealClusterSizeGet gets the cluster-wide dynamic cluster size setpoint.
+func (obj *World) IdealClusterSizeGet(ctx context.Context) (uint16, error) {
+	c := client.NewClientFromSimple(obj.Client, ChooserPath)
+	if err := c.Init(); err != nil {
+		return 0, err
+	}
+	defer c.Close()                       // ignore error
+	return chooser.DynamicSizeGet(ctx, c) // use client with added namespace
+}
+
+// IdealClusterSizeSet sets the cluster-wide dynamic cluster size setpoint.
+func (obj *World) IdealClusterSizeSet(ctx context.Context, size uint16) (bool, error) {
+	c := client.NewClientFromSimple(obj.Client, ChooserPath)
+	if err := c.Init(); err != nil {
+		return false, err
+	}
+	defer c.Close() // ignore error
+	return chooser.DynamicSizeSet(ctx, c, size)
 }
 
 // StrWatch returns a channel which spits out events on possible string changes.
-func (obj *World) StrWatch(namespace string) chan error {
-	return WatchStr(obj.EmbdEtcd, namespace)
+func (obj *World) StrWatch(ctx context.Context, namespace string) (chan error, error) {
+	return str.WatchStr(ctx, obj.Client, namespace)
 }
 
 // StrIsNotExist returns whether the error from StrGet is a key missing error.
 func (obj *World) StrIsNotExist(err error) bool {
-	return err == ErrNotExist
+	return err == interfaces.ErrNotExist
 }
 
 // StrGet returns the value for the the given namespace.
-func (obj *World) StrGet(namespace string) (string, error) {
-	return GetStr(obj.EmbdEtcd, namespace)
+func (obj *World) StrGet(ctx context.Context, namespace string) (string, error) {
+	return str.GetStr(ctx, obj.Client, namespace)
 }
 
 // StrSet sets the namespace value to a particular string.
-func (obj *World) StrSet(namespace, value string) error {
-	return SetStr(obj.EmbdEtcd, namespace, &value)
+func (obj *World) StrSet(ctx context.Context, namespace, value string) error {
+	return str.SetStr(ctx, obj.Client, namespace, &value)
 }
 
 // StrDel deletes the value in a particular namespace.
-func (obj *World) StrDel(namespace string) error {
-	return SetStr(obj.EmbdEtcd, namespace, nil)
+func (obj *World) StrDel(ctx context.Context, namespace string) error {
+	return str.SetStr(ctx, obj.Client, namespace, nil)
 }
 
 // StrMapWatch returns a channel which spits out events on possible string changes.
-func (obj *World) StrMapWatch(namespace string) chan error {
-	return WatchStrMap(obj.EmbdEtcd, namespace)
+func (obj *World) StrMapWatch(ctx context.Context, namespace string) (chan error, error) {
+	return strmap.WatchStrMap(ctx, obj.Client, namespace)
 }
 
 // StrMapGet returns a map of hostnames to values in the given namespace.
-func (obj *World) StrMapGet(namespace string) (map[string]string, error) {
-	return GetStrMap(obj.EmbdEtcd, []string{}, namespace)
+func (obj *World) StrMapGet(ctx context.Context, namespace string) (map[string]string, error) {
+	return strmap.GetStrMap(ctx, obj.Client, []string{}, namespace)
 }
 
 // StrMapSet sets the namespace value to a particular string under the identity
 // of its own hostname.
-func (obj *World) StrMapSet(namespace, value string) error {
-	return SetStrMap(obj.EmbdEtcd, obj.Hostname, namespace, &value)
+func (obj *World) StrMapSet(ctx context.Context, namespace, value string) error {
+	return strmap.SetStrMap(ctx, obj.Client, obj.Hostname, namespace, &value)
 }
 
 // StrMapDel deletes the value in a particular namespace.
-func (obj *World) StrMapDel(namespace string) error {
-	return SetStrMap(obj.EmbdEtcd, obj.Hostname, namespace, nil)
+func (obj *World) StrMapDel(ctx context.Context, namespace string) error {
+	return strmap.SetStrMap(ctx, obj.Client, obj.Hostname, namespace, nil)
 }
 
 // Scheduler returns a scheduling result of hosts in a particular namespace.
+// XXX: Add a context.Context here
 func (obj *World) Scheduler(namespace string, opts ...scheduler.Option) (*scheduler.Result, error) {
 	modifiedOpts := []scheduler.Option{}
 	for _, o := range opts {
@@ -115,7 +164,8 @@ func (obj *World) Scheduler(namespace string, opts ...scheduler.Option) (*schedu
 	modifiedOpts = append(modifiedOpts, scheduler.Debug(obj.Debug))
 	modifiedOpts = append(modifiedOpts, scheduler.Logf(obj.Logf))
 
-	return scheduler.Schedule(obj.EmbdEtcd.GetClient(), fmt.Sprintf("%s/scheduler/%s", NS, namespace), obj.Hostname, modifiedOpts...)
+	path := fmt.Sprintf(schedulerPathFmt, namespace)
+	return scheduler.Schedule(obj.Client.GetClient(), path, obj.Hostname, modifiedOpts...)
 }
 
 // Fs returns a distributed file system from a unique URI. For single host
@@ -144,9 +194,14 @@ func (obj *World) Fs(uri string) (engine.Fs, error) {
 	}
 
 	etcdFs := &etcdfs.Fs{
-		Client:     obj.EmbdEtcd.GetClient(),
+		Client:     obj.Client, // TODO: do we need to add a namespace?
 		Metadata:   u.Path,
 		DataPrefix: obj.StoragePrefix,
+
+		Debug: obj.Debug,
+		Logf: func(format string, v ...interface{}) {
+			obj.Logf("fs: "+format, v...)
+		},
 	}
 	return etcdFs, nil
 }
diff --git a/examples/lang/etcd-config0.mcl b/examples/lang/etcd-config0.mcl
new file mode 100644
index 00000000..5f4cfde4
--- /dev/null
+++ b/examples/lang/etcd-config0.mcl
@@ -0,0 +1,4 @@
+# sets a cluster parameter, safe to be called identically from multiple machines
+config:etcd "whatever" {
+	idealclustersize => 7,
+}
diff --git a/examples/lang/exchange0.mcl b/examples/lang/exchange0.mcl
index a6d1f3bf..9e855ce6 100644
--- a/examples/lang/exchange0.mcl
+++ b/examples/lang/exchange0.mcl
@@ -1,9 +1,10 @@
 # run this example with these commands
 # watch -n 0.1 'tail *'	# run this in /tmp/mgmt/
-# time ./mgmt run --hostname h1 --ideal-cluster-size 1 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
-# time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
-# time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
-# time ./mgmt run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
+# time ./mgmt run --hostname h1 --tmp-prefix --no-pgp empty
+# time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp empty
+# time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp empty
+# time ./mgmt run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp empty
+# time ./mgmt deploy --no-git --seeds http://127.0.0.1:2379 lang --lang examples/lang/exchange0.mcl
 
 import "sys"
 import "world"
diff --git a/lang/funcs/core/world/exchange_func.go b/lang/funcs/core/world/exchange_func.go
index bb1af6ad..ce94c66d 100644
--- a/lang/funcs/core/world/exchange_func.go
+++ b/lang/funcs/core/world/exchange_func.go
@@ -18,6 +18,7 @@
 package coreworld
 
 import (
+	"context"
 	"fmt"
 
 	"github.com/purpleidea/mgmt/lang/funcs"
@@ -75,6 +76,8 @@ func (obj *ExchangeFunc) Init(init *interfaces.Init) error {
 // Stream returns the changing values that this func has over time.
 func (obj *ExchangeFunc) Stream() error {
 	defer close(obj.init.Output) // the sender closes
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
 	for {
 		select {
 		// TODO: should this first chan be run as a priority channel to
@@ -105,8 +108,13 @@ func (obj *ExchangeFunc) Stream() error {
 			// TODO: support changing the namespace over time...
 			// TODO: possibly removing our stored value there first!
 			if obj.namespace == "" {
-				obj.namespace = namespace                                 // store it
-				obj.watchChan = obj.init.World.StrMapWatch(obj.namespace) // watch for var changes
+				obj.namespace = namespace // store it
+				var err error
+				obj.watchChan, err = obj.init.World.StrMapWatch(ctx, obj.namespace) // watch for var changes
+				if err != nil {
+					return err
+				}
+
 			} else if obj.namespace != namespace {
 				return fmt.Errorf("can't change namespace, previously: `%s`", obj.namespace)
 			}
@@ -116,7 +124,7 @@ func (obj *ExchangeFunc) Stream() error {
 				obj.init.Logf("value: %+v", value)
 			}
 
-			if err := obj.init.World.StrMapSet(obj.namespace, value); err != nil {
+			if err := obj.init.World.StrMapSet(ctx, obj.namespace, value); err != nil {
 				return errwrap.Wrapf(err, "namespace write error of `%s` to `%s`", value, obj.namespace)
 			}
 
@@ -134,7 +142,7 @@ func (obj *ExchangeFunc) Stream() error {
 				return errwrap.Wrapf(err, "channel watch failed on `%s`", obj.namespace)
 			}
 
-			keyMap, err := obj.init.World.StrMapGet(obj.namespace)
+			keyMap, err := obj.init.World.StrMapGet(ctx, obj.namespace)
 			if err != nil {
 				return errwrap.Wrapf(err, "channel read failed on `%s`", obj.namespace)
 			}
diff --git a/lang/funcs/core/world/kvlookup_func.go b/lang/funcs/core/world/kvlookup_func.go
index ec0e8a11..03cc4771 100644
--- a/lang/funcs/core/world/kvlookup_func.go
+++ b/lang/funcs/core/world/kvlookup_func.go
@@ -18,6 +18,7 @@
 package coreworld
 
 import (
+	"context"
 	"fmt"
 
 	"github.com/purpleidea/mgmt/lang/funcs"
@@ -73,6 +74,8 @@ func (obj *KVLookupFunc) Init(init *interfaces.Init) error {
 // Stream returns the changing values that this func has over time.
 func (obj *KVLookupFunc) Stream() error {
 	defer close(obj.init.Output) // the sender closes
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
 	for {
 		select {
 		// TODO: should this first chan be run as a priority channel to
@@ -103,10 +106,14 @@ func (obj *KVLookupFunc) Stream() error {
 			// TODO: support changing the namespace over time...
 			// TODO: possibly removing our stored value there first!
 			if obj.namespace == "" {
-				obj.namespace = namespace                                 // store it
-				obj.watchChan = obj.init.World.StrMapWatch(obj.namespace) // watch for var changes
+				obj.namespace = namespace // store it
+				var err error
+				obj.watchChan, err = obj.init.World.StrMapWatch(ctx, obj.namespace) // watch for var changes
+				if err != nil {
+					return err
+				}
 
-				result, err := obj.buildMap() // build the map...
+				result, err := obj.buildMap(ctx) // build the map...
 				if err != nil {
 					return err
 				}
@@ -135,7 +142,7 @@ func (obj *KVLookupFunc) Stream() error {
 				return errwrap.Wrapf(err, "channel watch failed on `%s`", obj.namespace)
 			}
 
-			result, err := obj.buildMap() // build the map...
+			result, err := obj.buildMap(ctx) // build the map...
 			if err != nil {
 				return err
 			}
@@ -166,8 +173,8 @@ func (obj *KVLookupFunc) Close() error {
 }
 
 // buildMap builds the result map which we'll need. It uses struct variables.
-func (obj *KVLookupFunc) buildMap() (types.Value, error) {
-	keyMap, err := obj.init.World.StrMapGet(obj.namespace)
+func (obj *KVLookupFunc) buildMap(ctx context.Context) (types.Value, error) {
+	keyMap, err := obj.init.World.StrMapGet(ctx, obj.namespace)
 	if err != nil {
 		return nil, errwrap.Wrapf(err, "channel read failed on `%s`", obj.namespace)
 	}
diff --git a/lang/funcs/core/world/schedule_func.go b/lang/funcs/core/world/schedule_func.go
index 8bf5a7c2..feeb1e4d 100644
--- a/lang/funcs/core/world/schedule_func.go
+++ b/lang/funcs/core/world/schedule_func.go
@@ -16,7 +16,7 @@
 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 // test with:
-// time ./mgmt run --hostname h1 --ideal-cluster-size 1 --tmp-prefix --no-pgp lang --lang examples/lang/schedule0.mcl
+// time ./mgmt run --hostname h1 --tmp-prefix --no-pgp lang --lang examples/lang/schedule0.mcl
 // time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp lang --lang examples/lang/schedule0.mcl
 // time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp lang --lang examples/lang/schedule0.mcl
 // kill h2 (should see h1 and h3 pick [h1, h3] instead)
diff --git a/lib/deploy.go b/lib/deploy.go
index 55b9d46c..0e0dcb26 100644
--- a/lib/deploy.go
+++ b/lib/deploy.go
@@ -18,11 +18,13 @@
 package lib
 
 import (
+	"context"
 	"fmt"
 	"log"
 	"os"
 
-	"github.com/purpleidea/mgmt/etcd"
+	"github.com/purpleidea/mgmt/etcd/client"
+	"github.com/purpleidea/mgmt/etcd/deployer"
 	etcdfs "github.com/purpleidea/mgmt/etcd/fs"
 	"github.com/purpleidea/mgmt/gapi"
 	"github.com/purpleidea/mgmt/util/errwrap"
@@ -34,12 +36,13 @@ import (
 
 const (
 	// MetadataPrefix is the etcd prefix where all our fs superblocks live.
-	MetadataPrefix = etcd.NS + "/fs"
+	MetadataPrefix = "/fs"
 	// StoragePrefix is the etcd prefix where all our fs data lives.
-	StoragePrefix = etcd.NS + "/storage"
+	StoragePrefix = "/storage"
 )
 
 // deploy is the cli target to manage deploys to our cluster.
+// TODO: add a timeout and/or cancel signal to replace context.TODO()
 func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error {
 	cliContext := c.Parent()
 	if cliContext == nil {
@@ -55,7 +58,12 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error {
 			debug = flags.Debug
 		}
 	}
+	Logf := func(format string, v ...interface{}) {
+		log.Printf("deploy: "+format, v...)
+	}
+
 	hello(program, version, flags) // say hello!
+	defer Logf("goodbye!")
 
 	var hash, pHash string
 	if !cliContext.Bool("no-git") {
@@ -74,7 +82,7 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error {
 		}
 
 		hash = head.Hash().String() // current commit id
-		log.Printf("deploy: hash: %s", hash)
+		Logf("hash: %s", hash)
 
 		lo := &git.LogOptions{
 			From: head.Hash(),
@@ -90,7 +98,7 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error {
 		if err == nil { // errors are okay, we might be empty
 			pHash = commit.Hash.String() // previous commit id
 		}
-		log.Printf("deploy: previous deploy hash: %s", pHash)
+		Logf("previous deploy hash: %s", pHash)
 		if cliContext.Bool("force") {
 			pHash = "" // don't check this :(
 		}
@@ -101,28 +109,58 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error {
 
 	uniqueid := uuid.New() // panic's if it can't generate one :P
 
-	etcdClient := &etcd.ClientEtcd{
-		Seeds: cliContext.StringSlice("seeds"), // endpoints
+	etcdClient := client.NewClientFromSeedsNamespace(
+		cliContext.StringSlice("seeds"), // endpoints
+		NS,
+	)
+	if err := etcdClient.Init(); err != nil {
+		return errwrap.Wrapf(err, "client Init failed")
 	}
-	if err := etcdClient.Connect(); err != nil {
-		return errwrap.Wrapf(err, "client connection error")
+	defer func() {
+		err := errwrap.Wrapf(etcdClient.Close(), "client Close failed")
+		if err != nil {
+			// TODO: cause the final exit code to be non-zero
+			Logf("client cleanup error: %+v", err)
+		}
+	}()
+
+	simpleDeploy := &deployer.SimpleDeploy{
+		Client: etcdClient,
+		Debug:  debug,
+		Logf: func(format string, v ...interface{}) {
+			Logf("deploy: "+format, v...)
+		},
 	}
-	defer etcdClient.Destroy()
+	if err := simpleDeploy.Init(); err != nil {
+		return errwrap.Wrapf(err, "deploy Init failed")
+	}
+	defer func() {
+		err := errwrap.Wrapf(simpleDeploy.Close(), "deploy Close failed")
+		if err != nil {
+			// TODO: cause the final exit code to be non-zero
+			Logf("deploy cleanup error: %+v", err)
+		}
+	}()
 
 	// get max id (from all the previous deploys)
-	max, err := etcd.GetMaxDeployID(etcdClient)
+	max, err := simpleDeploy.GetMaxDeployID(context.TODO())
 	if err != nil {
 		return errwrap.Wrapf(err, "error getting max deploy id")
 	}
 	// find the latest id
 	var id = max + 1 // next id
-	log.Printf("deploy: max deploy id: %d", max)
+	Logf("previous max deploy id: %d", max)
 
 	etcdFs := &etcdfs.Fs{
-		Client: etcdClient.GetClient(),
+		Client: etcdClient,
 		// TODO: using a uuid is meant as a temporary measure, i hate them
 		Metadata:   MetadataPrefix + fmt.Sprintf("/deploy/%d-%s", id, uniqueid),
 		DataPrefix: StoragePrefix,
+
+		Debug: debug,
+		Logf: func(format string, v ...interface{}) {
+			Logf("fs: "+format, v...)
+		},
 	}
 
 	cliInfo := &gapi.CliInfo{
@@ -154,9 +192,9 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error {
 	}
 
 	// this nominally checks the previous git hash matches our expectation
-	if err := etcd.AddDeploy(etcdClient, id, hash, pHash, &str); err != nil {
+	if err := simpleDeploy.AddDeploy(context.TODO(), id, hash, pHash, &str); err != nil {
 		return errwrap.Wrapf(err, "could not create deploy id `%d`", id)
 	}
-	log.Printf("deploy: success, id: %d", id)
+	Logf("success, id: %d", id)
 	return nil
 }
diff --git a/lib/hello.go b/lib/hello.go
index 9355dd23..2ebf16b9 100644
--- a/lib/hello.go
+++ b/lib/hello.go
@@ -37,6 +37,7 @@ func hello(program, version string, flags Flags) {
 	log.SetFlags(logFlags)
 
 	// un-hijack from capnslog...
+	// XXX: move this to the etcd package when new version deprecates capnslog
 	log.SetOutput(os.Stderr)
 	if flags.Verbose {
 		capnslog.SetFormatter(capnslog.NewLogFormatter(os.Stderr, "(etcd) ", logFlags))
diff --git a/lib/main.go b/lib/main.go
index a378c712..4686d133 100644
--- a/lib/main.go
+++ b/lib/main.go
@@ -18,6 +18,7 @@
 package lib
 
 import (
+	"context"
 	"fmt"
 	"io/ioutil"
 	"log"
@@ -33,6 +34,8 @@ import (
 	"github.com/purpleidea/mgmt/engine/graph/autogroup"
 	_ "github.com/purpleidea/mgmt/engine/resources" // let register's run
 	"github.com/purpleidea/mgmt/etcd"
+	"github.com/purpleidea/mgmt/etcd/chooser"
+	"github.com/purpleidea/mgmt/etcd/deployer"
 	"github.com/purpleidea/mgmt/gapi"
 	"github.com/purpleidea/mgmt/gapi/empty"
 	"github.com/purpleidea/mgmt/pgp"
@@ -44,10 +47,14 @@ import (
 	etcdtypes "github.com/coreos/etcd/pkg/types"
 )
 
+const (
+	// NS is the root namespace for etcd operations. All keys must use it!
+	NS = "/_mgmt" // must not end with a slash!
+)
+
 // Flags are some constant flags which are used throughout the program.
 type Flags struct {
 	Debug   bool // add additional log messages
-	Trace   bool // add execution flow log messages
 	Verbose bool // add extra log message output
 }
 
@@ -105,7 +112,8 @@ type Main struct {
 	Prometheus       bool   // enable prometheus metrics
 	PrometheusListen string // prometheus instance bind specification
 
-	ge *graph.Engine
+	embdEtcd *etcd.EmbdEtcd // TODO: can be an interface in the future...
+	ge       *graph.Engine
 
 	exit    *util.EasyExit // exit signal
 	cleanup []func() error // list of functions to run on close
@@ -140,7 +148,7 @@ func (obj *Main) Init() error {
 
 	obj.idealClusterSize = uint16(obj.IdealClusterSize)
 	if obj.IdealClusterSize < 0 { // value is undefined, set to the default
-		obj.idealClusterSize = etcd.DefaultIdealClusterSize
+		obj.idealClusterSize = chooser.DefaultIdealDynamicSize
 	}
 
 	if obj.idealClusterSize < 1 {
@@ -194,7 +202,8 @@ func (obj *Main) Run() error {
 	hello(obj.Program, obj.Version, obj.Flags) // say hello!
 	defer Logf("goodbye!")
 
-	defer obj.exit.Done(nil) // ensure this gets called even if Exit doesn't
+	exitCtx := obj.exit.Context() // local exit signal
+	defer obj.exit.Done(nil)      // ensure this gets called even if Exit doesn't
 
 	hostname, err := os.Hostname() // a sensible default
 	// allow passing in the hostname, instead of using the system setting
@@ -243,13 +252,14 @@ func (obj *Main) Run() error {
 		if err := prom.InitKindMetrics(engine.RegisteredResourcesNames()); err != nil {
 			return errwrap.Wrapf(err, "can't initialize kind-specific prometheus metrics")
 		}
-		obj.cleanup = append(obj.cleanup, func() error {
+		defer func() {
 			Logf("prometheus: stopping instance")
-			if err := prom.Stop(); err != nil {
-				return errwrap.Wrapf(err, "the prometheus instance exited poorly")
+			err := errwrap.Wrapf(prom.Stop(), "the prometheus instance exited poorly")
+			if err != nil {
+				// TODO: cause the final exit code to be non-zero
+				Logf("cleanup error: %+v", err)
 			}
-			return nil
-		})
+		}()
 	}
 
 	if !obj.NoPgp {
@@ -296,6 +306,8 @@ func (obj *Main) Run() error {
 
 	exitchan := make(chan struct{}) // exit on close
 	wg := &sync.WaitGroup{}         // waitgroup for inner loop & goroutines
+	defer wg.Wait()                 // wait in case we have an early exit
+	defer obj.exit.Done(nil)        // trigger exit in case something blocks
 
 	// exit after `max-runtime` seconds for no reason at all...
 	if i := obj.MaxRuntime; i > 0 {
@@ -335,63 +347,108 @@ func (obj *Main) Run() error {
 	// XXX: should this be moved to later in the code?
 	go converger.Run(true) // main loop for converger, true to start paused
 	converger.Ready()      // block until ready
-	obj.cleanup = append(obj.cleanup, func() error {
+	defer func() {
 		// TODO: shutdown converger, but make sure that using it in a
 		// still running embdEtcd struct doesn't block waiting on it...
 		converger.Shutdown()
-		return nil
-	})
+	}()
 
 	// embedded etcd
 	if len(obj.seeds) == 0 {
-		Logf("etcd: seeds: no seeds specified!")
+		Logf("no seeds specified!")
 	} else {
-		Logf("etcd: seeds(%d): %+v", len(obj.seeds), obj.seeds)
+		Logf("seeds(%d): %+v", len(obj.seeds), obj.seeds)
 	}
-	embdEtcd := etcd.NewEmbdEtcd(
-		hostname,
-		obj.seeds,
-		obj.clientURLs,
-		obj.serverURLs,
-		obj.advertiseClientURLs,
-		obj.advertiseServerURLs,
-		obj.NoServer,
-		obj.NoNetwork,
-		obj.idealClusterSize,
-		etcd.Flags{
-			Debug:   obj.Flags.Debug,
-			Trace:   obj.Flags.Trace,
-			Verbose: obj.Flags.Verbose,
-		},
-		prefix,
-		converger,
-	)
-	if embdEtcd == nil {
-		return fmt.Errorf("etcd: creation failed")
-	} else if err := embdEtcd.Startup(); err != nil { // startup (returns when etcd main loop is running)
-		return errwrap.Wrapf(err, "etcd: startup failed")
-	}
-	obj.cleanup = append(obj.cleanup, func() error {
-		// cleanup etcd main loop last so it can process everything first
-		err := embdEtcd.Destroy() // shutdown and cleanup etcd
-		return errwrap.Wrapf(err, "etcd: exited poorly")
-	})
+	obj.embdEtcd = &etcd.EmbdEtcd{
+		Hostname: hostname,
+		Seeds:    obj.seeds,
 
-	// wait for etcd server to be ready before continuing...
-	// XXX: this is wrong if we're not going to be a server! we'll block!!!
-	//	select {
-	//	case <-embdEtcd.ServerReady():
-	//		Logf("etcd: server: ready!")
-	//		// pass
-	//	case <-time.After(((etcd.MaxStartServerTimeout * etcd.MaxStartServerRetries) + 1) * time.Second):
-	//		return fmt.Errorf("etcd: startup timeout")
-	//	}
-	time.Sleep(1 * time.Second) // XXX: temporary workaround
+		ClientURLs:  obj.clientURLs,
+		ServerURLs:  obj.serverURLs,
+		AClientURLs: obj.advertiseClientURLs,
+		AServerURLs: obj.advertiseServerURLs,
+
+		NoServer:  obj.NoServer,
+		NoNetwork: obj.NoNetwork,
+
+		Chooser: &chooser.DynamicSize{
+			IdealClusterSize: obj.idealClusterSize,
+		},
+
+		Converger: converger,
+
+		NS:     NS, // namespace
+		Prefix: fmt.Sprintf("%s/", path.Join(prefix, "etcd")),
+
+		Debug: obj.Flags.Debug,
+		Logf: func(format string, v ...interface{}) {
+			log.Printf("etcd: "+format, v...)
+		},
+	}
+	if err := obj.embdEtcd.Init(); err != nil {
+		return errwrap.Wrapf(err, "etcd init failed")
+	}
+	defer func() {
+		// cleanup etcd main loop last so it can process everything first
+		err := errwrap.Wrapf(obj.embdEtcd.Close(), "etcd close failed")
+		if err != nil {
+			// TODO: cause the final exit code to be non-zero
+			Logf("cleanup error: %+v", err)
+		}
+	}()
+
+	var etcdErr error
+	// don't add a wait group here, this is done in embdEtcd.Destroy()
+	go func() {
+		etcdErr = obj.embdEtcd.Run()                             // returns when it shuts down...
+		obj.exit.Done(errwrap.Wrapf(etcdErr, "etcd run failed")) // trigger exit
+	}()
+	// tell etcd to shutdown, blocks until done!
+	// TODO: handle/report error?
+	defer obj.embdEtcd.Destroy()
+
+	// wait for etcd to be ready before continuing...
+	// TODO: do we need to add a timeout here?
+	select {
+	case <-obj.embdEtcd.Ready():
+		Logf("etcd is ready!")
+		// pass
+
+	case <-obj.embdEtcd.Exited():
+		Logf("etcd was destroyed!")
+		err := fmt.Errorf("etcd was destroyed on startup")
+		if etcdErr != nil {
+			err = etcdErr
+		}
+		return err
+	}
+	// TODO: should getting a client from EmbdEtcd already come with the NS?
+	etcdClient, err := obj.embdEtcd.MakeClientFromNamespace(NS)
+	if err != nil {
+		return errwrap.Wrapf(err, "make Client failed")
+	}
+	simpleDeploy := &deployer.SimpleDeploy{
+		Client: etcdClient,
+		Debug:  obj.Flags.Debug,
+		Logf: func(format string, v ...interface{}) {
+			log.Printf("deploy: "+format, v...)
+		},
+	}
+	if err := simpleDeploy.Init(); err != nil {
+		return errwrap.Wrapf(err, "deploy Init failed")
+	}
+	defer func() {
+		err := errwrap.Wrapf(simpleDeploy.Close(), "deploy Close failed")
+		if err != nil {
+			// TODO: cause the final exit code to be non-zero
+			Logf("cleanup error: %+v", err)
+		}
+	}()
 
 	// implementation of the World API (alternatives can be substituted in)
 	world := &etcd.World{
 		Hostname:       hostname,
-		EmbdEtcd:       embdEtcd,
+		Client:         etcdClient,
 		MetadataPrefix: MetadataPrefix,
 		StoragePrefix:  StoragePrefix,
 		StandaloneFs:   obj.DeployFs, // used for static deploys
@@ -415,9 +472,16 @@ func (obj *Main) Run() error {
 	}
 
 	if err := obj.ge.Init(); err != nil {
-		return errwrap.Wrapf(err, "engine: creation failed")
+		return errwrap.Wrapf(err, "engine Init failed")
 	}
-	// After this point, the inner "main loop" must run, so that the engine
+	defer func() {
+		err := errwrap.Wrapf(obj.ge.Close(), "engine Close failed")
+		if err != nil {
+			// TODO: cause the final exit code to be non-zero
+			Logf("cleanup error: %+v", err)
+		}
+	}()
+	// After this point, the inner "main loop" will run, so that the engine
 	// can get closed with the deploy close via the deploy chan shutdown...
 
 	// main loop logic starts here
@@ -456,7 +520,7 @@ func (obj *Main) Run() error {
 						obj.ge.Pause(false)
 					}
 					// must be paused before this is run
-					obj.ge.Close()
+					//obj.ge.Close() // run in defer instead
 
 					return // this is the only place we exit
 				}
@@ -678,9 +742,10 @@ func (obj *Main) Run() error {
 
 	// get max id (from all the previous deploys)
 	// this is what the existing cluster is already running
-	// TODO: can this block since we didn't deploy yet?
-	max, err := etcd.GetMaxDeployID(embdEtcd)
+	// TODO: add a timeout to context?
+	max, err := simpleDeploy.GetMaxDeployID(exitCtx)
 	if err != nil {
+		close(deployChan) // because we won't close it downstream...
 		return errwrap.Wrapf(err, "error getting max deploy id")
 	}
 
@@ -710,9 +775,24 @@ func (obj *Main) Run() error {
 
 		// now we can wait for future deploys, but if we already had an
 		// initial deploy from run, don't switch to this unless it's new
+		ctx, cancel := context.WithCancel(context.Background())
+		watchChan, err := simpleDeploy.WatchDeploy(ctx)
+		if err != nil {
+			cancel()
+			Logf("error starting deploy: %+v", err)
+			return
+		}
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			defer cancel() // unblock watch deploy
+			select {       // wait until we're ready to shutdown
+			case <-exitchan:
+			}
+		}()
+		canceled := false
+
 		var last uint64
-		startChan := make(chan struct{}) // start signal
-		close(startChan)                 // kick it off!
 		for {
 			if obj.NoDeployWatch && (obj.Deploy != nil || last > 0) {
 				// block here, because when we close the
@@ -725,29 +805,33 @@ func (obj *Main) Run() error {
 			}
 
 			select {
-			case <-startChan: // kick the loop once at start
-				startChan = nil // disable
-
-			case err, ok := <-etcd.WatchDeploy(embdEtcd):
+			// WatchDeploy should send an initial event now...
+			case err, ok := <-watchChan:
 				if !ok {
-					obj.exit.Done(nil) // regular shutdown
+					// TODO: is any of this needed in here?
+					if !canceled {
+						obj.exit.Done(nil) // regular shutdown
+					}
 					return
 				}
+				if err == context.Canceled {
+					canceled = true
+					continue // channel close is coming...
+				}
 				if err != nil {
 					// TODO: it broke, can we restart?
-					obj.exit.Done(fmt.Errorf("deploy: watch error"))
-					return
+					obj.exit.Done(errwrap.Wrapf(err, "deploy: watch error"))
+					continue
 				}
-				startChan = nil // disable it early...
 				if obj.Flags.Debug {
 					Logf("deploy: got activity")
 				}
 
-			case <-exitchan:
-				return
+				//case <-exitchan:
+				//	return // exit via channel close instead
 			}
 
-			latest, err := etcd.GetMaxDeployID(embdEtcd) // or zero
+			latest, err := simpleDeploy.GetMaxDeployID(ctx) // or zero
 			if err != nil {
 				Logf("error getting max deploy id: %+v", err)
 				continue
@@ -774,7 +858,7 @@ func (obj *Main) Run() error {
 
 			// 0 passes through an empty deploy without an error...
 			// (unless there is some sort of etcd error that occurs)
-			str, err := etcd.GetDeploy(embdEtcd, latest)
+			str, err := simpleDeploy.GetDeploy(ctx, latest)
 			if err != nil {
 				Logf("deploy: error getting deploy: %+v", err)
 				continue
@@ -871,6 +955,9 @@ func (obj *Main) FastExit(err error) {
 // might leave some of your resources in a partial or unknown state.
 func (obj *Main) Interrupt(err error) {
 	// XXX: implement and run Interrupt API for supported resources
-
 	obj.FastExit(err)
+
+	if obj.embdEtcd != nil {
+		obj.embdEtcd.Interrupt() // unblock borked clusters
+	}
 }
diff --git a/lib/run.go b/lib/run.go
index e30b66bf..cfdcd373 100644
--- a/lib/run.go
+++ b/lib/run.go
@@ -175,14 +175,19 @@ func run(c *cli.Context, name string, gapiObj gapi.GAPI) error {
 	reterr := obj.Run()
 	if reterr != nil {
 		// log the error message returned
-		log.Printf("main: Error: %v", reterr)
+		if obj.Flags.Debug {
+			log.Printf("main: %+v", reterr)
+		}
 	}
 
 	if err := obj.Close(); err != nil {
-		log.Printf("main: Close: %v", err)
+		if obj.Flags.Debug {
+			log.Printf("main: Close: %+v", err)
+		}
 		if reterr == nil {
 			return err
 		}
+		reterr = errwrap.Append(reterr, err)
 	}
 
 	return reterr
diff --git a/main.go b/main.go
index 7faea99a..cc035df4 100644
--- a/main.go
+++ b/main.go
@@ -27,7 +27,6 @@ import (
 // These constants are some global variables that are used throughout the code.
 const (
 	Debug   = false // add additional log messages
-	Trace   = false // add execution flow log messages
 	Verbose = false // add extra log message output
 )
 
@@ -40,7 +39,6 @@ var (
 func main() {
 	flags := mgmt.Flags{
 		Debug:   Debug,
-		Trace:   Trace,
 		Verbose: Verbose,
 	}
 	if err := mgmt.CLI(program, version, flags); err != nil {
diff --git a/misc/filter-golang-stack.py b/misc/filter-golang-stack.py
index beba8cff..7de6f091 100755
--- a/misc/filter-golang-stack.py
+++ b/misc/filter-golang-stack.py
@@ -23,17 +23,25 @@
 
 import sys
 
-lines = sys.stdin.readlines()
+if len(sys.argv) == 2 and sys.argv[1] != "-":
+	lines = open(sys.argv[1], "r").readlines()
+else:
+	lines = sys.stdin.readlines()
 
 print("read: %d lines" % len(lines))
 
 # find program start
+start = -1
 for i in range(len(lines)):
 	line = lines[i]
 	if line.startswith("PC="):
 		start=i
 		break
 
+if start == -1:
+	print("could not find program start, looking for PC=???", file=sys.stderr)
+	sys.exit(1)
+
 print("starts at line: %d" % (start+1)) # +1 because we're zero based
 
 def is_chunk(line):
@@ -59,6 +67,18 @@ def filter_chunk(chunk):
 	package_line = lines[1]
 	if package_line.startswith("github.com/purpleidea/mgmt/vendor/"):
 		return False
+	if package_line.startswith("github.com/") and not package_line.startswith("github.com/purpleidea/mgmt/"):
+		return False
+	if package_line.startswith("internal/poll"):
+		return False
+	if package_line.startswith("context.propagateCancel"):
+		return False
+	if package_line.startswith("runtime.gopark"):
+		return False
+	if package_line.startswith("runtime.futex"):
+		return False
+	if package_line.startswith("os/signal.signal_recv"):
+		return False
 
 	return True
 
diff --git a/test/shell/clustersize.sh b/test/shell/etcd-clustersize.sh
similarity index 85%
rename from test/shell/clustersize.sh
rename to test/shell/etcd-clustersize.sh
index 3d7cd50e..fbd613f7 100755
--- a/test/shell/clustersize.sh
+++ b/test/shell/etcd-clustersize.sh
@@ -10,7 +10,7 @@ if ! command -v etcdctl >/dev/null; then
 	exit 0
 fi
 
-mkdir /tmp/mgmt/{A..E}
+#mkdir /tmp/mgmt/{A..E}
 
 # kill servers on error/exit
 trap 'pkill -9 mgmt' EXIT
@@ -22,7 +22,7 @@ $TIMEOUT "$MGMT" run --hostname h3 --tmp-prefix --no-pgp --seeds http://127.0.0.
 # wait for everything to converge
 sleep 30s
 
-ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/idealClusterSize 3
+ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/chooser/dynamicsize/idealclustersize 3
 
 $TIMEOUT "$MGMT" run --hostname h4 --tmp-prefix --no-pgp --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 empty &
 $TIMEOUT "$MGMT" run --hostname h5 --tmp-prefix --no-pgp --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2387 --server-urls http://127.0.0.1:2388 empty &
@@ -32,7 +32,7 @@ sleep 30s
 
 test "$(ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 member list | wc -l)" -eq 3
 
-ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2381 put /_mgmt/idealClusterSize 5
+ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/chooser/dynamicsize/idealclustersize 5
 
 # wait for everything to converge
 sleep 30s
diff --git a/test/shell/etcd-conflicting-server.sh b/test/shell/etcd-conflicting-server.sh
new file mode 100755
index 00000000..b11c4194
--- /dev/null
+++ b/test/shell/etcd-conflicting-server.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. "$(dirname "$0")/../util.sh"
+
+# run empty graphs, we're just testing etcd clustering
+$TIMEOUT "$MGMT" run --no-pgp --tmp-prefix empty &
+pid1=$!
+sleep 15s	# let it startup
+
+# run a second one that should conflict because a server is already running...
+$TIMEOUT "$MGMT" run --no-pgp --tmp-prefix empty &
+pid2=$!
+wait $pid2
+e=$?
+if [ $e -eq 0 ]; then
+	echo "second mgmt exited successfully when error was expected"
+	exit 1
+fi
+if [ $e -ne 1 ]; then
+	echo "second mgmt exited with unexpected error of $e"
+	exit $e
+fi
+
+$(kill -SIGINT $pid1)&	# send ^C to exit 1st mgmt
+wait $pid1	# get exit status
+# if pid1 exits because of a timeout, then it blocked, and this is a bug!
+exit $?
diff --git a/test/shell/etcd-three-hosts-reversed.sh b/test/shell/etcd-three-hosts-reversed.sh
new file mode 100755
index 00000000..23fb822e
--- /dev/null
+++ b/test/shell/etcd-three-hosts-reversed.sh
@@ -0,0 +1,35 @@
+#!/bin/bash -e
+
+. "$(dirname "$0")/../util.sh"
+
+# run empty graphs, we're just testing etcd clustering
+$TIMEOUT "$MGMT" run --hostname h1 --tmp-prefix empty &
+pid1=$!
+sleep 15s	# let it startup
+
+$TIMEOUT "$MGMT" run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix empty &
+pid2=$!
+sleep 15s
+
+$TIMEOUT "$MGMT" run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix empty &
+pid3=$!
+sleep 15s
+
+$(sleep 15s && kill -SIGINT $pid1)&	# send ^C to exit 1st mgmt (reversed!)
+wait $pid1
+e=$?
+if [ $e -ne 0 ]; then
+	exit $e
+fi
+
+$(sleep 15s && kill -SIGINT $pid2)&	# send ^C to exit 2nd mgmt
+wait $pid2
+e=$?
+if [ $e -ne 0 ]; then
+	exit $e
+fi
+
+$(sleep 15s && kill -SIGINT $pid3)&	# send ^C to exit 3rd mgmt (reversed!)
+wait $pid3	# get exit status
+# if pid3 exits because of a timeout, then it blocked, and this is a bug!
+exit $?
diff --git a/test/shell/etcd-two-hosts-reversed.sh b/test/shell/etcd-two-hosts-reversed.sh
new file mode 100755
index 00000000..3123fb76
--- /dev/null
+++ b/test/shell/etcd-two-hosts-reversed.sh
@@ -0,0 +1,24 @@
+#!/bin/bash -e
+
+. "$(dirname "$0")/../util.sh"
+
+# run empty graphs, we're just testing etcd clustering
+$TIMEOUT "$MGMT" run --hostname h1 --tmp-prefix empty &
+pid1=$!
+sleep 15s	# let it startup
+
+$TIMEOUT "$MGMT" run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix empty &
+pid2=$!
+sleep 15s
+
+$(sleep 15s && kill -SIGINT $pid1)&	# send ^C to exit 1st mgmt! (reverse!)
+wait $pid1
+e=$?
+if [ $e -ne 0 ]; then
+	exit $e
+fi
+
+$(sleep 15s && kill -SIGINT $pid2)&	# send ^C to exit 2nd mgmt (reverse!)
+wait $pid2	# get exit status
+# if pid2 exits because of a timeout, then it blocked, and this is a bug!
+exit $?
diff --git a/test/shell/exchange.sh b/test/shell/exchange.sh
index 3ac9a951..c2a48836 100755
--- a/test/shell/exchange.sh
+++ b/test/shell/exchange.sh
@@ -5,18 +5,58 @@
 set -o errexit
 set -o pipefail
 
-$TIMEOUT "$MGMT" run --hostname h1 --ideal-cluster-size 1 --tmp-prefix --no-pgp lang --lang exchange0.mcl &
-$TIMEOUT "$MGMT" run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp lang --lang exchange0.mcl &
-$TIMEOUT "$MGMT" run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp lang --lang exchange0.mcl &
-$TIMEOUT "$MGMT" run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp lang --lang exchange0.mcl &
+$TIMEOUT "$MGMT" run --hostname h1 --tmp-prefix --no-pgp empty &
+pid1=$!
+sleep 10s
+$TIMEOUT "$MGMT" run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp empty &
+pid2=$!
+sleep 10s
+$TIMEOUT "$MGMT" run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp empty &
+pid3=$!
+sleep 10s
+$TIMEOUT "$MGMT" run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp empty &
+pid4=$!
+sleep 10s
+$TIMEOUT "$MGMT" deploy --no-git --seeds http://127.0.0.1:2379 lang --lang exchange0.mcl
 
 # kill servers on error/exit
-trap 'pkill -9 mgmt' EXIT
+#trap 'pkill -9 mgmt' EXIT
 
 # wait for everything to converge
-sleep 10s
+sleep 15s
+
+# debug
+tail /tmp/mgmt/exchange-*
 
 test "$(cat /tmp/mgmt/exchange-* | grep -c h1)" -eq 4
 test "$(cat /tmp/mgmt/exchange-* | grep -c h2)" -eq 4
 test "$(cat /tmp/mgmt/exchange-* | grep -c h3)" -eq 4
 test "$(cat /tmp/mgmt/exchange-* | grep -c h4)" -eq 4
+
+$(sleep 15s && kill -SIGINT $pid4)&	# send ^C to exit mgmt...
+wait $pid4
+e=$?
+if [ $e -ne 0 ]; then
+	exit $e
+fi
+
+$(sleep 15s && kill -SIGINT $pid3)&	# send ^C to exit mgmt...
+wait $pid3
+e=$?
+if [ $e -ne 0 ]; then
+	exit $e
+fi
+
+$(sleep 15s && kill -SIGINT $pid2)&	# send ^C to exit mgmt...
+wait $pid2
+e=$?
+if [ $e -ne 0 ]; then
+	exit $e
+fi
+
+$(sleep 15s && kill -SIGINT $pid1)&	# send ^C to exit mgmt...
+wait $pid1
+e=$?
+if [ $e -ne 0 ]; then
+	exit $e
+fi
diff --git a/test/shell/exchange0.mcl b/test/shell/exchange0.mcl
index a6d1f3bf..9e855ce6 100644
--- a/test/shell/exchange0.mcl
+++ b/test/shell/exchange0.mcl
@@ -1,9 +1,10 @@
 # run this example with these commands
 # watch -n 0.1 'tail *'	# run this in /tmp/mgmt/
-# time ./mgmt run --hostname h1 --ideal-cluster-size 1 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
-# time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
-# time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
-# time ./mgmt run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
+# time ./mgmt run --hostname h1 --tmp-prefix --no-pgp empty
+# time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp empty
+# time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp empty
+# time ./mgmt run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp empty
+# time ./mgmt deploy --no-git --seeds http://127.0.0.1:2379 lang --lang examples/lang/exchange0.mcl
 
 import "sys"
 import "world"
diff --git a/test/test-govet.sh b/test/test-govet.sh
index cd06d1d1..3f63ec6a 100755
--- a/test/test-govet.sh
+++ b/test/test-govet.sh
@@ -65,6 +65,9 @@ function consistent-imports() {
 	if grep $'\t"github.com/purpleidea/mgmt/engine/util"' "$1"; then	# import as engineUtil
 		return 1
 	fi
+	if grep '"golang.org/x/net/context"' "$1"; then	# use built-in context
+		return 1
+	fi
 }
 
 # run go vet on a per-package basis
diff --git a/util/sync_test.go b/util/sync_test.go
index 229a00c6..00ce1f2e 100644
--- a/util/sync_test.go
+++ b/util/sync_test.go
@@ -86,7 +86,7 @@ func TestEasyAckOnce2(t *testing.T) {
 	}
 }
 
-func ExampleSubscribeSync() {
+func ExampleSubscribedSignal() {
 	fmt.Println("hello")
 
 	x := &SubscribedSignal{}
diff --git a/util/util.go b/util/util.go
index e39c9da1..3c7facfd 100644
--- a/util/util.go
+++ b/util/util.go
@@ -430,6 +430,21 @@ func TimeAfterOrBlockCtx(ctx context.Context, t int) <-chan struct{} {
 	return ch
 }
 
+// CloseAfter takes a duration, similarly to `time.After`, and returns a channel
+// that closes when either the context is done, or the duration expires.
+func CloseAfter(ctx context.Context, d time.Duration) <-chan struct{} {
+	ch := make(chan struct{})
+	go func() {
+		defer close(ch)
+		select {
+		case <-time.After(d):
+			// done
+		case <-ctx.Done():
+		}
+	}()
+	return ch
+}
+
 // SystemBusPrivateUsable makes using the private bus usable.
 // TODO: should be upstream: https://github.com/godbus/dbus/issues/15
 func SystemBusPrivateUsable() (conn *dbus.Conn, err error) {
@@ -468,6 +483,26 @@ func SessionBusPrivateUsable() (conn *dbus.Conn, err error) {
 	return conn, nil // success
 }
 
+// PriorityStrSliceSort filters any elements matching fn to the end of the list.
+// You can reverse the match result with a not to filter to the front instead!
+// A copy of the list is returned, the original is not modified.
+func PriorityStrSliceSort(input []string, fn func(string) bool) []string {
+	output := []string{}
+	found := []string{}
+	for _, x := range input {
+		if fn(x) { // if we find the key, don't include it just yet
+			found = append(found, x) // save for later
+			continue
+		}
+		output = append(output, x)
+	}
+
+	// include the keys at the end (if found)
+	output = append(output, found...)
+
+	return output
+}
+
 // SortedStrSliceCompare takes two lists of strings and returns whether or not
 // they are equivalent. It will return nil if both sets contain the same
 // elements, regardless of order, and an error if they do not.
diff --git a/util/util_test.go b/util/util_test.go
index acbbf87c..b80c5f9a 100644
--- a/util/util_test.go
+++ b/util/util_test.go
@@ -22,6 +22,7 @@ package util
 import (
 	"reflect"
 	"sort"
+	"strings"
 	"testing"
 )
 
@@ -1014,6 +1015,76 @@ func TestRemovePathPrefix0(t *testing.T) {
 	}
 }
 
+func TestPriorityStrSliceSort0(t *testing.T) {
+	in := []string{"foo", "bar", "baz"}
+	ex := []string{"bar", "baz", "foo"}
+
+	fn := func(x string) bool {
+		return x == "foo"
+	}
+	out := PriorityStrSliceSort(in, fn)
+
+	if !reflect.DeepEqual(ex, out) {
+		t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out)
+	}
+}
+
+func TestPriorityStrSliceSort1(t *testing.T) {
+	in := []string{"foo", "bar", "baz"}
+	ex := []string{"bar", "foo", "baz"}
+
+	fn := func(x string) bool {
+		return x != "bar" // != brings this key to the front
+	}
+	out := PriorityStrSliceSort(in, fn)
+
+	if !reflect.DeepEqual(ex, out) {
+		t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out)
+	}
+}
+
+func TestPriorityStrSliceSort2(t *testing.T) {
+	in := []string{"bar", "foo", "bar", "bar", "baz"}
+	ex := []string{"foo", "baz", "bar", "bar", "bar"}
+
+	fn := func(x string) bool {
+		return x == "bar"
+	}
+	out := PriorityStrSliceSort(in, fn)
+
+	if !reflect.DeepEqual(ex, out) {
+		t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out)
+	}
+}
+
+func TestPriorityStrSliceSort3(t *testing.T) {
+	in := []string{"foo", "bar1", "bar2", "bar3", "baz"}
+	ex := []string{"bar1", "bar2", "bar3", "foo", "baz"}
+
+	fn := func(x string) bool {
+		return !strings.HasPrefix(x, "bar")
+	}
+	out := PriorityStrSliceSort(in, fn)
+
+	if !reflect.DeepEqual(ex, out) {
+		t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out)
+	}
+}
+
+func TestPriorityStrSliceSort4(t *testing.T) {
+	in := []string{"foo", "bar1", "bar2", "bar3", "baz"}
+	ex := []string{"foo", "baz", "bar1", "bar2", "bar3"}
+
+	fn := func(x string) bool {
+		return strings.HasPrefix(x, "bar")
+	}
+	out := PriorityStrSliceSort(in, fn)
+
+	if !reflect.DeepEqual(ex, out) {
+		t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out)
+	}
+}
+
 func TestSortedStrSliceCompare0(t *testing.T) {
 	slice0 := []string{"foo", "bar", "baz"}
 	slice1 := []string{"bar", "foo", "baz"}
diff --git a/vendor/github.com/coreos/etcd b/vendor/github.com/coreos/etcd
index 27fc7e22..d57e8b8d 160000
--- a/vendor/github.com/coreos/etcd
+++ b/vendor/github.com/coreos/etcd
@@ -1 +1 @@
-Subproject commit 27fc7e2296f506182f58ce846e48f36b34fe6842
+Subproject commit d57e8b8d97adfc4a6c224fe116714bf1a1f3beb9
diff --git a/yamlgraph/gapi.go b/yamlgraph/gapi.go
index d433f5fe..cc065eff 100644
--- a/yamlgraph/gapi.go
+++ b/yamlgraph/gapi.go
@@ -18,6 +18,7 @@
 package yamlgraph
 
 import (
+	"context"
 	"fmt"
 	"sync"
 
@@ -166,6 +167,10 @@ func (obj *GAPI) Next() chan gapi.Next {
 			ch <- next
 			return
 		}
+		// FIXME: add timeout to context
+		ctx, cancel := context.WithCancel(context.Background())
+		defer cancel()
+
 		startChan := make(chan struct{}) // start signal
 		close(startChan)                 // kick it off!
 
@@ -173,7 +178,16 @@ func (obj *GAPI) Next() chan gapi.Next {
 		if obj.data.NoStreamWatch {
 			watchChan = nil
 		} else {
-			watchChan = obj.data.World.ResWatch()
+			var err error
+			watchChan, err = obj.data.World.ResWatch(ctx)
+			if err != nil {
+				next := gapi.Next{
+					Err:  errwrap.Wrapf(err, "%s: could not start watch", Name),
+					Exit: true, // exit, b/c programming error?
+				}
+				ch <- next
+				return
+			}
 		}
 
 		for {
diff --git a/yamlgraph/gconfig.go b/yamlgraph/gconfig.go
index d668fe5e..4f4d380f 100644
--- a/yamlgraph/gconfig.go
+++ b/yamlgraph/gconfig.go
@@ -19,6 +19,7 @@
 package yamlgraph
 
 import (
+	"context"
 	"fmt"
 	"strings"
 
@@ -168,6 +169,7 @@ func (obj *GraphConfig) Parse(data []byte) error {
 
 // NewGraphFromConfig transforms a GraphConfig struct into a new graph.
 // FIXME: remove any possibly left over, now obsolete graph diff code from here!
+// TODO: add a timeout to replace context.TODO()
 func (obj *GraphConfig) NewGraphFromConfig(hostname string, world engine.World, noop bool) (*pgraph.Graph, error) {
 	// hostname is the uuid for the host
 
@@ -224,7 +226,7 @@ func (obj *GraphConfig) NewGraphFromConfig(hostname string, world engine.World,
 	}
 
 	// store in backend (usually etcd)
-	if err := world.ResExport(resourceList); err != nil {
+	if err := world.ResExport(context.TODO(), resourceList); err != nil {
 		return nil, fmt.Errorf("Config: Could not export resources: %v", err)
 	}
 
@@ -239,7 +241,7 @@ func (obj *GraphConfig) NewGraphFromConfig(hostname string, world engine.World,
 	// database changes, we don't have a partial state of affairs...
 	if len(kindFilter) > 0 { // if kindFilter is empty, don't need to do lookups!
 		var err error
-		resourceList, err = world.ResCollect(hostnameFilter, kindFilter)
+		resourceList, err = world.ResCollect(context.TODO(), hostnameFilter, kindFilter)
 		if err != nil {
 			return nil, fmt.Errorf("Config: Could not collect resources: %v", err)
 		}