etcd: Rewrite embed etcd implementation

This is a giant cleanup of the etcd code. The earlier version was
written when I was less experienced with golang.

This is still not perfect, and does contain some races, but at least
it's a decent base to start from. The automatic elastic clustering
should be considered an experimental feature. If you need a more
battle-tested cluster, then you should manage etcd manually and point
mgmt at your existing cluster.
This commit is contained in:
James Shubin
2018-05-05 17:35:08 -04:00
parent fb275d9537
commit a5842a41b2
56 changed files with 5459 additions and 2654 deletions

View File

@@ -215,23 +215,25 @@ requires a number of seconds as an argument.
./mgmt run lang --lang examples/lang/hello0.mcl --converged-timeout=5
```
### What does the error message about an inconsistent dataDir mean?
### On startup `mgmt` hangs after: `etcd: server: starting...`.
If you get an error message similar to:
```
Etcd: Connect: CtxError...
Etcd: CtxError: Reason: CtxDelayErr(5s): No endpoints available yet!
Etcd: Connect: Endpoints: []
Etcd: The dataDir (/var/lib/mgmt/etcd) might be inconsistent or corrupt.
etcd: server: starting...
etcd: server: start timeout of 1m0s reached
etcd: server: close timeout of 15s reached
```
This happens when there are a series of fatal connect errors in a row. This can
happen when you start `mgmt` using a dataDir that doesn't correspond to the
current cluster view. As a result, the embedded etcd server never finishes
starting up, and as a result, a default endpoint never gets added. The solution
is to either reconcile the mistake, and if there is no important data saved, you
can remove the etcd dataDir. This is typically `/var/lib/mgmt/etcd/member/`.
But nothing happens afterwards, this can be due to a corrupt etcd storage
directory. Each etcd server embedded in mgmt must have a special directory where
it stores local state. It must not be shared by more than one individual member.
This dir is typically `/var/lib/mgmt/etcd/member/`. If you accidentally use it
(for example during testing) with a different cluster view, then you can corrupt
it. This can happen if you use it with more than one different hostname.
The solution is to avoid making this mistake, and if there is no important data
saved, you can remove the etcd member dir and start over.
### On running `make` to build a new version, it errors with: `Text file busy`.

View File

@@ -62,6 +62,13 @@ type Engine struct {
// If the struct does not validate, or it cannot initialize, then this errors.
// Initially it will contain an empty graph.
func (obj *Engine) Init() error {
if obj.Program == "" {
return fmt.Errorf("the Program is empty")
}
if obj.Hostname == "" {
return fmt.Errorf("the Hostname is empty")
}
var err error
if obj.graph, err = pgraph.NewGraph("graph"); err != nil {
return err

View File

@@ -0,0 +1,250 @@
// Mgmt
// Copyright (C) 2013-2019+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package resources
import (
"context"
"fmt"
"sync"
"time"
"github.com/purpleidea/mgmt/engine"
"github.com/purpleidea/mgmt/engine/traits"
"github.com/purpleidea/mgmt/util"
"github.com/purpleidea/mgmt/util/errwrap"
)
func init() {
engine.RegisterResource("config:etcd", func() engine.Res { return &ConfigEtcdRes{} })
}
const (
sizeCheckApplyTimeout = 5 * time.Second
)
// ConfigEtcdRes is a resource that sets mgmt's etcd configuration.
type ConfigEtcdRes struct {
traits.Base // add the base methods without re-implementation
init *engine.Init
// IdealClusterSize is the requested minimum size of the cluster. If you
// set this to zero, it will cause a cluster wide shutdown if
// AllowSizeShutdown is true. If it's not true, then it will cause a
// validation error.
IdealClusterSize uint16 `lang:"idealclustersize"`
// AllowSizeShutdown is a required safety flag that you must set to true
// if you want to allow causing a cluster shutdown by setting
// IdealClusterSize to zero.
AllowSizeShutdown bool `lang:"allow_size_shutdown"`
// sizeFlag determines whether sizeCheckApply already ran or not.
sizeFlag bool
interruptChan chan struct{}
wg *sync.WaitGroup
}
// Default returns some sensible defaults for this resource.
func (obj *ConfigEtcdRes) Default() engine.Res {
return &ConfigEtcdRes{}
}
// Validate if the params passed in are valid data.
func (obj *ConfigEtcdRes) Validate() error {
if obj.IdealClusterSize < 0 {
return fmt.Errorf("the IdealClusterSize param must be positive")
}
if obj.IdealClusterSize == 0 && !obj.AllowSizeShutdown {
return fmt.Errorf("the IdealClusterSize can't be zero if AllowSizeShutdown is false")
}
return nil
}
// Init runs some startup code for this resource.
func (obj *ConfigEtcdRes) Init(init *engine.Init) error {
obj.init = init // save for later
obj.interruptChan = make(chan struct{})
obj.wg = &sync.WaitGroup{}
return nil
}
// Close is run by the engine to clean up after the resource is done.
func (obj *ConfigEtcdRes) Close() error {
obj.wg.Wait() // bonus
return nil
}
// Watch is the primary listener for this resource and it outputs events.
func (obj *ConfigEtcdRes) Watch() error {
obj.wg.Add(1)
defer obj.wg.Done()
// FIXME: add timeout to context
// The obj.init.Done channel is closed by the engine to signal shutdown.
ctx, cancel := util.ContextWithCloser(context.Background(), obj.init.Done)
defer cancel()
ch, err := obj.init.World.IdealClusterSizeWatch(util.CtxWithWg(ctx, obj.wg))
if err != nil {
return errwrap.Wrapf(err, "could not watch ideal cluster size")
}
obj.init.Running() // when started, notify engine that we're running
Loop:
for {
select {
case event, ok := <-ch:
if !ok {
break Loop
}
if obj.init.Debug {
obj.init.Logf("event: %+v", event)
}
// pass through and send an event
case <-obj.init.Done: // closed by the engine to signal shutdown
}
obj.init.Event() // notify engine of an event (this can block)
}
return nil
}
// sizeCheckApply sets the IdealClusterSize parameter. If it sees a value change
// to zero, then it *won't* try and change it away from zero, because it assumes
// that someone has requested a shutdown. If the value is seen on first startup,
// then it will change it, because it might be a zero from the previous cluster.
func (obj *ConfigEtcdRes) sizeCheckApply(apply bool) (bool, error) {
wg := &sync.WaitGroup{}
defer wg.Wait() // this must be above the defer cancel() call
ctx, cancel := context.WithTimeout(context.Background(), sizeCheckApplyTimeout)
defer cancel()
wg.Add(1)
go func() {
defer wg.Done()
select {
case <-obj.interruptChan:
cancel()
case <-ctx.Done():
// let this exit
}
}()
val, err := obj.init.World.IdealClusterSizeGet(ctx)
if err != nil {
return false, errwrap.Wrapf(err, "could not get ideal cluster size")
}
// if we got a value of zero, and we've already run before, then it's ok
if obj.IdealClusterSize != 0 && val == 0 && obj.sizeFlag {
obj.init.Logf("impending cluster shutdown, not setting ideal cluster size")
return true, nil // impending shutdown, don't try and cancel it.
}
obj.sizeFlag = true
// must be done after setting the above flag
if obj.IdealClusterSize == val { // state is correct
return true, nil
}
if !apply {
return false, nil
}
// set!
// This is run as a transaction so we detect if we needed to change it.
changed, err := obj.init.World.IdealClusterSizeSet(ctx, obj.IdealClusterSize)
if err != nil {
return false, errwrap.Wrapf(err, "could not set ideal cluster size")
}
if !changed {
return true, nil // we lost a race, which means no change needed
}
obj.init.Logf("set dynamic cluster size to: %d", obj.IdealClusterSize)
return false, nil
}
// CheckApply method for Noop resource. Does nothing, returns happy!
func (obj *ConfigEtcdRes) CheckApply(apply bool) (bool, error) {
checkOK := true
if c, err := obj.sizeCheckApply(apply); err != nil {
return false, err
} else if !c {
checkOK = false
}
// TODO: add more config settings management here...
//if c, err := obj.TODOCheckApply(apply); err != nil {
// return false, err
//} else if !c {
// checkOK = false
//}
return checkOK, nil // w00t
}
// Cmp compares two resources and returns an error if they are not equivalent.
func (obj *ConfigEtcdRes) Cmp(r engine.Res) error {
// we can only compare ConfigEtcdRes to others of the same resource kind
res, ok := r.(*ConfigEtcdRes)
if !ok {
return fmt.Errorf("not a %s", obj.Kind())
}
if obj.IdealClusterSize != res.IdealClusterSize {
return fmt.Errorf("the IdealClusterSize param differs")
}
if obj.AllowSizeShutdown != res.AllowSizeShutdown {
return fmt.Errorf("the AllowSizeShutdown param differs")
}
return nil
}
// Interrupt is called to ask the execution of this resource to end early.
func (obj *ConfigEtcdRes) Interrupt() error {
close(obj.interruptChan)
return nil
}
// UnmarshalYAML is the custom unmarshal handler for this struct.
// It is primarily useful for setting the defaults.
func (obj *ConfigEtcdRes) UnmarshalYAML(unmarshal func(interface{}) error) error {
type rawRes ConfigEtcdRes // indirection to avoid infinite recursion
def := obj.Default() // get the default
res, ok := def.(*ConfigEtcdRes) // put in the right format
if !ok {
return fmt.Errorf("could not convert to ConfigEtcdRes")
}
raw := rawRes(*res) // convert; the defaults go here
if err := unmarshal(&raw); err != nil {
return err
}
*obj = ConfigEtcdRes(raw) // restore from indirection with type conversion!
return nil
}

View File

@@ -18,11 +18,15 @@
package resources
import (
"context"
"fmt"
"strconv"
"sync"
"time"
"github.com/purpleidea/mgmt/engine"
"github.com/purpleidea/mgmt/engine/traits"
"github.com/purpleidea/mgmt/util"
"github.com/purpleidea/mgmt/util/errwrap"
)
@@ -39,6 +43,10 @@ const (
SkipCmpStyleString
)
const (
kvCheckApplyTimeout = 5 * time.Second
)
// KVRes is a resource which writes a key/value pair into cluster wide storage.
// It will ensure that the key is set to the requested value. The one exception
// is that if you use the SkipLessThan parameter, then it will only replace the
@@ -67,6 +75,8 @@ type KVRes struct {
// the value is greater when using the SkipLessThan parameter.
SkipCmpStyle KVResSkipCmpStyle `lang:"skipcmpstyle" yaml:"skipcmpstyle"`
interruptChan chan struct{}
// TODO: does it make sense to have different backends here? (eg: local)
}
@@ -107,6 +117,8 @@ func (obj *KVRes) Validate() error {
func (obj *KVRes) Init(init *engine.Init) error {
obj.init = init // save for later
obj.interruptChan = make(chan struct{})
return nil
}
@@ -117,9 +129,17 @@ func (obj *KVRes) Close() error {
// Watch is the primary listener for this resource and it outputs events.
func (obj *KVRes) Watch() error {
obj.init.Running() // when started, notify engine that we're running
// FIXME: add timeout to context
// The obj.init.Done channel is closed by the engine to signal shutdown.
ctx, cancel := util.ContextWithCloser(context.Background(), obj.init.Done)
defer cancel()
ch := obj.init.World.StrMapWatch(obj.getKey()) // get possible events!
ch, err := obj.init.World.StrMapWatch(ctx, obj.getKey()) // get possible events!
if err != nil {
return err
}
obj.init.Running() // when started, notify engine that we're running
var send = false // send event?
for {
@@ -191,13 +211,28 @@ func (obj *KVRes) lessThanCheck(value string) (bool, error) {
func (obj *KVRes) CheckApply(apply bool) (bool, error) {
obj.init.Logf("CheckApply(%t)", apply)
wg := &sync.WaitGroup{}
defer wg.Wait() // this must be above the defer cancel() call
ctx, cancel := context.WithTimeout(context.Background(), kvCheckApplyTimeout)
defer cancel()
wg.Add(1)
go func() {
defer wg.Done()
select {
case <-obj.interruptChan:
cancel()
case <-ctx.Done():
// let this exit
}
}()
if val, exists := obj.init.Recv()["Value"]; exists && val.Changed {
// if we received on Value, and it changed, wooo, nothing to do.
obj.init.Logf("CheckApply: `Value` was updated!")
}
hostname := obj.init.Hostname // me
keyMap, err := obj.init.World.StrMapGet(obj.getKey())
keyMap, err := obj.init.World.StrMapGet(ctx, obj.getKey())
if err != nil {
return false, errwrap.Wrapf(err, "check error during StrGet")
}
@@ -217,7 +252,7 @@ func (obj *KVRes) CheckApply(apply bool) (bool, error) {
return true, nil // nothing to delete, we're good!
} else if ok && obj.Value == nil { // delete
err := obj.init.World.StrMapDel(obj.getKey())
err := obj.init.World.StrMapDel(ctx, obj.getKey())
return false, errwrap.Wrapf(err, "apply error during StrDel")
}
@@ -225,7 +260,7 @@ func (obj *KVRes) CheckApply(apply bool) (bool, error) {
return false, nil
}
if err := obj.init.World.StrMapSet(obj.getKey(), *obj.Value); err != nil {
if err := obj.init.World.StrMapSet(ctx, obj.getKey(), *obj.Value); err != nil {
return false, errwrap.Wrapf(err, "apply error during StrSet")
}
@@ -261,6 +296,12 @@ func (obj *KVRes) Cmp(r engine.Res) error {
return nil
}
// Interrupt is called to ask the execution of this resource to end early.
func (obj *KVRes) Interrupt() error {
close(obj.interruptChan)
return nil
}
// KVUID is the UID struct for KVRes.
type KVUID struct {
engine.BaseUID

View File

@@ -18,6 +18,8 @@
package engine
import (
"context"
"github.com/purpleidea/mgmt/etcd/scheduler"
)
@@ -25,22 +27,26 @@ import (
// the GAPI to store state and exchange information throughout the cluster. It
// is the interface each machine uses to communicate with the rest of the world.
type World interface { // TODO: is there a better name for this interface?
ResWatch() chan error
ResExport([]Res) error
ResWatch(context.Context) (chan error, error)
ResExport(context.Context, []Res) error
// FIXME: should this method take a "filter" data struct instead of many args?
ResCollect(hostnameFilter, kindFilter []string) ([]Res, error)
ResCollect(ctx context.Context, hostnameFilter, kindFilter []string) ([]Res, error)
StrWatch(namespace string) chan error
IdealClusterSizeWatch(context.Context) (chan error, error)
IdealClusterSizeGet(context.Context) (uint16, error)
IdealClusterSizeSet(context.Context, uint16) (bool, error)
StrWatch(ctx context.Context, namespace string) (chan error, error)
StrIsNotExist(error) bool
StrGet(namespace string) (string, error)
StrSet(namespace, value string) error
StrDel(namespace string) error
StrGet(ctx context.Context, namespace string) (string, error)
StrSet(ctx context.Context, namespace, value string) error
StrDel(ctx context.Context, namespace string) error
// XXX: add the exchange primitives in here directly?
StrMapWatch(namespace string) chan error
StrMapGet(namespace string) (map[string]string, error)
StrMapSet(namespace, value string) error
StrMapDel(namespace string) error
StrMapWatch(ctx context.Context, namespace string) (chan error, error)
StrMapGet(ctx context.Context, namespace string) (map[string]string, error)
StrMapSet(ctx context.Context, namespace, value string) error
StrMapDel(ctx context.Context, namespace string) error
Scheduler(namespace string, opts ...scheduler.Option) (*scheduler.Result, error)

497
etcd/callback.go Normal file
View File

@@ -0,0 +1,497 @@
// Mgmt
// Copyright (C) 2013-2019+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package etcd
import (
"context"
"fmt"
"sync"
"github.com/purpleidea/mgmt/etcd/interfaces"
"github.com/purpleidea/mgmt/util"
"github.com/purpleidea/mgmt/util/errwrap"
etcd "github.com/coreos/etcd/clientv3" // "clientv3"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
)
// nominateApply applies the changed watcher data onto our local caches.
func (obj *EmbdEtcd) nominateApply(data *interfaces.WatcherData) error {
if data == nil { // ignore empty data
return nil
}
// If we tried to lookup the nominated members here (in etcd v3) this
// would sometimes block because we would lose the cluster leader once
// the current leader calls the MemberAdd API and it steps down trying
// to form a two host cluster. Instead, we can look at the event
// response data to read the nominated values! Since we only see what
// has *changed* in the response data, we have to keep track of the
// original state and apply the deltas. This must be idempotent in case
// it errors and is called again. If we're retrying and we get a data
// format error, it's probably not the end of the world.
nominated, err := applyDeltaEvents(data, obj.nominated) // map[hostname]URLs (URLsMap)
if err != nil && err != errInconsistentApply { // allow missing deletes
return err // unexpected error, fail
}
// TODO: do we want to sort this if it becomes a list instead of a map?
//sort.Strings(nominated) // deterministic order
obj.nominated = nominated
return nil
}
// volunteerApply applies the changed watcher data onto our local caches.
func (obj *EmbdEtcd) volunteerApply(data *interfaces.WatcherData) error {
if data == nil { // ignore empty data
return nil
}
volunteers, err := applyDeltaEvents(data, obj.volunteers) // map[hostname]URLs (URLsMap)
if err != nil && err != errInconsistentApply { // allow missing deletes
return err // unexpected error, fail
}
// TODO: do we want to sort this if it becomes a list instead of a map?
//sort.Strings(volunteers) // deterministic order
obj.volunteers = volunteers
return nil
}
// endpointApply applies the changed watcher data onto our local caches. In this
// particular apply function, it also sets our client with the new endpoints.
func (obj *EmbdEtcd) endpointApply(data *interfaces.WatcherData) error {
if data == nil { // ignore empty data
return nil
}
endpoints, err := applyDeltaEvents(data, obj.endpoints) // map[hostname]URLs (URLsMap)
if err != nil && err != errInconsistentApply { // allow missing deletes
return err // unexpected error, fail
}
// is the endpoint list different?
if err := cmpURLsMap(obj.endpoints, endpoints); err != nil {
obj.endpoints = endpoints // set
// can happen if a server drops out for example
obj.Logf("endpoint list changed to: %+v", endpoints)
obj.setEndpoints()
}
return nil
}
// nominateCb runs to respond to the nomination list change events.
// Functionally, it controls the starting and stopping of the server process. If
// a nominate message is received for this machine, then it means it is already
// being added to the cluster with member add and the cluster is now waiting for
// it to start up. When a nominate entry is removed, it's up to this function to
// run the member remove right before it shuts its server down.
func (obj *EmbdEtcd) nominateCb(ctx context.Context) error {
// Ensure that only one copy of this function is run simultaneously.
// This is because we don't want to cause runServer to race with
// destroyServer. Let us completely start up before we can cancel it. As
// a special case, destroyServer itself can race against itself. I don't
// think it's possible for contention on this mutex, but we'll leave it
// in for safety.
obj.nominatedMutex.Lock()
defer obj.nominatedMutex.Unlock()
// This ordering mutex is being added for safety, since there is no good
// reason for this function and volunteerCb to run simultaneously, and
// it might be preventing a race condition that was happening.
obj.orderingMutex.Lock()
defer obj.orderingMutex.Unlock()
if obj.Debug {
obj.Logf("nominateCb")
defer obj.Logf("nominateCb: done!")
}
// check if i have actually volunteered first of all...
if obj.NoServer || len(obj.ServerURLs) == 0 {
obj.Logf("inappropriately nominated, rogue or stale server?")
// TODO: should we un-nominate ourself?
return nil // we've done our job successfully
}
// This can happen when we're shutting down, build the nominated value.
if len(obj.nominated) == 0 {
obj.Logf("list of nominations is empty")
//return nil // don't exit, we might want to shutdown the server
} else {
obj.Logf("nominated: %v", obj.nominated)
}
// if there are no other peers, we create a new server
// TODO: do we need an || len(obj.nominated) == 0 if we're the first?
_, exists := obj.nominated[obj.Hostname] // am i nominated?
newCluster := len(obj.nominated) == 1 && exists
if obj.Debug {
obj.Logf("nominateCb: newCluster: %t; exists: %t; obj.server == nil: %t", newCluster, exists, obj.server == nil)
}
// TODO: server start retries should be handled inside of runServer...
if obj.serverAction(serverActionStart) { // start
// no server is running, but it should be
wg := &sync.WaitGroup{}
serverReady, ackReady := obj.ServerReady() // must call ack!
serverExited, ackExited := obj.ServerExited() // must call ack!
var sendError = false
var serverErr error
obj.Logf("waiting for server...")
nominated, err := copyURLsMap(obj.nominated)
if err != nil {
return err
}
wg.Add(1)
go func() {
defer wg.Done()
obj.errExitN = make(chan struct{})
defer close(obj.errExitN) // multi-signal for errChan close op
// blocks until server exits
serverErr = obj.runServer(newCluster, nominated)
// in case this exits on its own instead of with destroy
defer obj.destroyServer() // run to reset some values
if sendError && serverErr != nil { // exited with an error
select {
case obj.errChan <- errwrap.Wrapf(serverErr, "runServer errored"):
}
}
}()
// block until either server is ready or an early exit occurs
select {
case <-serverReady:
// detach from our local return of errors from an early
// server exit (pre server ready) and switch to channel
sendError = true // gets set before the ackReady() does
ackReady() // must be called
ackExited() // must be called
// pass
case <-serverExited:
ackExited() // must be called
ackReady() // must be called
wg.Wait() // wait for server to finish to get early err
return serverErr
}
// Once the server is online, we *must* publish this information
// so that (1) others know where to connect to us (2) we provide
// an "event" for member add since there is not any event that's
// currently built-in to etcd and (3) so we have a key to expire
// when we shutdown or crash to give us the member remove event.
// please see issue: https://github.com/coreos/etcd/issues/5277
} else if obj.serverAction(serverActionStop) { // stop?
// server is running, but it should not be
// i have been un-nominated, remove self and shutdown server!
// we don't need to do a member remove if i'm the last one...
if len(obj.nominated) != 0 { // don't call if nobody left but me!
// work around: https://github.com/coreos/etcd/issues/5482
// and it might make sense to avoid it if we're the last
obj.Logf("member remove: removing self: %d", obj.serverID)
resp, err := obj.memberRemove(ctx, obj.serverID)
if err != nil {
if obj.Debug {
obj.Logf("error with member remove: %v", err)
}
return errwrap.Wrapf(err, "member remove error")
}
if resp != nil {
obj.Logf("member removed (self): %s (%d)", obj.Hostname, obj.serverID)
if err := obj.updateMemberState(resp.Members); err != nil {
return err
}
}
}
// FIXME: if we fail on destroy should we try to run some of the
// other cleanup tasks that usually afterwards (below) anyways ?
if err := obj.destroyServer(); err != nil { // sync until exited
return errwrap.Wrapf(err, "destroyServer errored")
}
// We close with this special sentinel only during destroy/exit.
if obj.closing {
return interfaces.ErrShutdown
}
}
return nil
}
// volunteerCb runs to respond to the volunteer list change events.
// Functionally, it controls the nominating and adding of members. It typically
// nominates a peer so that it knows it will get to be a server, which causes it
// to start up its server. It also runs the member add operation so that the
// cluster gets quorum safely. The member remove operation is typically run in
// the nominateCb of that server when it is asked to shutdown. This occurs when
// the nominate entry for that server is removed. If a server removes its
// volunteer entry we must respond by removing the nomination so that it can
// receive that message and shutdown.
// FIXME: we might need to respond to member change/disconnect/shutdown events,
// see: https://github.com/coreos/etcd/issues/5277
// XXX: Don't allow this function to partially run if it is canceled part way
// through... We don't want an inconsistent state where we did unnominate, but
// didn't remove a member...
// XXX: If the leader changes, do we need to kick the volunteerCb or anything
// else that might have required a leader and which returned because it did not
// have one, thus loosing an event?
func (obj *EmbdEtcd) volunteerCb(ctx context.Context) error {
// Ensure that only one copy of this function is run simultaneously.
// It's not entirely clear if this can ever happen or if it's needed,
// but it's an inexpensive safety check that we can add in for now.
obj.volunteerMutex.Lock()
defer obj.volunteerMutex.Unlock()
// This ordering mutex is being added for safety, since there is no good
// reason for this function and nominateCb to run simultaneously, and it
// might be preventing a race condition that was happening.
obj.orderingMutex.Lock()
defer obj.orderingMutex.Unlock()
if obj.Debug {
obj.Logf("volunteerCb")
defer obj.Logf("volunteerCb: done!")
}
// FIXME: are there any situations where we don't want to short circuit
// here, such as if i'm the last node?
if obj.server == nil {
if obj.Debug {
obj.Logf("i'm not a server yet...")
}
return nil // if i'm not a server, i'm not a leader, return
}
// FIXME: Instead of checking this, assume yes, and use the
// `WithRequireLeader` wrapper, and just ignore the error from that if
// it's wrong... Combined with events that poke this volunteerCb when
// the leader changes, we shouldn't miss any events...
if isLeader, err := obj.isLeader(ctx); err != nil { // XXX: race!
return errwrap.Wrapf(err, "error determining leader")
} else if !isLeader {
if obj.Debug {
obj.Logf("we are not the leader...")
}
return nil
}
// i am the leader!
// Remember that the member* operations return the membership, so this
// means we don't need to run an extra memberList in those scenarios...
// However, this can get out of sync easily, so ensure that our member
// information is very recent.
if err := obj.memberStateFromList(ctx); err != nil {
return errwrap.Wrapf(err, "error during state sync")
}
// XXX: If we have any unstarted members here, do we want to reschedule
// this volunteerCb in a moment? Or will we get another event anyways?
// NOTE: There used to be an is_leader check right here...
// FIXME: Should we use WithRequireLeader instead? Here? Elsewhere?
// https://godoc.org/github.com/coreos/etcd/clientv3#WithRequireLeader
// FIXME: can this happen, and if so, is it an error or a pass-through?
if len(obj.volunteers) == 0 {
obj.Logf("list of volunteers is empty")
//return fmt.Errorf("volunteer list is empty")
} else {
obj.Logf("volunteers: %+v", obj.volunteers)
}
// TODO: do we really need to check these errors?
m, err := copyURLsMap(obj.membermap) // list of members...
if err != nil {
return err
}
v, err := copyURLsMap(obj.volunteers)
if err != nil {
return err
}
// Unnominate anyone that unvolunteers, so they can shutdown cleanly...
// FIXME: one step at a time... do we trigger subsequent steps somehow?
obj.Logf("chooser: (%+v)/(%+v)", m, v)
nominate, unnominate, err := obj.Chooser.Choose(m, v)
if err != nil {
return errwrap.Wrapf(err, "chooser error")
}
// Ensure that we are the *last* in the list if we're unnominating, and
// the *first* in the list if we're nominating. This way, we self-remove
// last, and we self-add first. This is least likely to hurt quorum.
headFn := func(x string) bool {
return x != obj.Hostname
}
tailFn := func(x string) bool {
return x == obj.Hostname
}
nominate = util.PriorityStrSliceSort(nominate, headFn)
unnominate = util.PriorityStrSliceSort(unnominate, tailFn)
obj.Logf("chooser result(+/-): %+v/%+v", nominate, unnominate)
var reterr error
leaderCtx := ctx // default ctx to use
if RequireLeaderCtx {
leaderCtx = etcd.WithRequireLeader(ctx) // FIXME: Is this correct?
}
for i := range nominate {
member := nominate[i]
peerURLs, exists := obj.volunteers[member] // comma separated list of urls
if !exists {
// if this happens, do we have an update race?
return fmt.Errorf("could not find member `%s` in volunteers map", member)
}
// NOTE: storing peerURLs when they're already in volunteers/ is
// redundant, but it seems to be necessary for a sane algorithm.
// nominate before we call the API so that members see it first!
if err := obj.nominate(leaderCtx, member, peerURLs); err != nil {
return errwrap.Wrapf(err, "error nominating: %s", member)
}
// XXX: can we add a ttl here, because once we nominate someone,
// we need to give them up to N seconds to start up after we run
// the MemberAdd API because if they don't, in some situations
// such as if we're adding the second node to the cluster, then
// we've lost quorum until a second member joins! If the TTL
// expires, we need to MemberRemove! In this special case, we
// need to forcefully remove the second member if we don't add
// them, because we'll be in a lack of quorum state and unable
// to do anything... As a result, we should always only add ONE
// member at a time!
// XXX: After we memberAdd, can we wait a timeout, and then undo
// the add if the member doesn't come up? We'd also need to run
// an unnominate too, and mark the node as temporarily failed...
obj.Logf("member add: %s: %v", member, peerURLs)
resp, err := obj.memberAdd(leaderCtx, peerURLs)
if err != nil {
// FIXME: On on error this function needs to run again,
// because we need to make sure to add the member here!
return errwrap.Wrapf(err, "member add error")
}
if resp != nil { // if we're already the right state, we get nil
obj.Logf("member added: %s (%d): %v", member, resp.Member.ID, peerURLs)
if err := obj.updateMemberState(resp.Members); err != nil {
return err
}
if resp.Member.Name == "" { // not started instantly ;)
obj.addMemberState(member, resp.Member.ID, peerURLs, nil)
}
// TODO: would this ever happen or be necessary?
//if member == obj.Hostname {
// obj.addSelfState()
//}
}
}
// we must remove them from the members API or it will look like a crash
if l := len(unnominate); l > 0 {
obj.Logf("unnominated: shutting down %d members...", l)
}
for i := range unnominate {
member := unnominate[i]
memberID, exists := obj.memberIDs[member] // map[string]uint64
if !exists {
// if this happens, do we have an update race?
return fmt.Errorf("could not find member `%s` in memberIDs map", member)
}
// start a watcher to know if member was added
cancelCtx, cancel := context.WithCancel(leaderCtx)
defer cancel()
timeout := util.CloseAfter(cancelCtx, SelfRemoveTimeout) // chan closes
fn := func(members []*pb.Member) error {
for _, m := range members {
if m.Name == member || m.ID == memberID {
return fmt.Errorf("still present")
}
}
return nil // not found!
}
ch, err := obj.memberChange(cancelCtx, fn, MemberChangeInterval)
if err != nil {
return errwrap.Wrapf(err, "error watching for change of: %s", member)
}
if err := obj.nominate(leaderCtx, member, nil); err != nil { // unnominate
return errwrap.Wrapf(err, "error unnominating: %s", member)
}
// Once we issue the above unnominate, that peer will
// shutdown, and this might cause us to loose quorum,
// therefore, let that member remove itself, and then
// double check that it did happen in case delinquent.
// TODO: get built-in transactional member Add/Remove
// functionality to avoid a separate nominate list...
// If we're removing ourself, then let the (un)nominate callback
// do it. That way it removes itself cleanly on server shutdown.
if member == obj.Hostname { // remove in unnominate!
cancel()
obj.Logf("unnominate: removing self...")
continue
}
// cancel remove sleep and unblock early on event...
obj.Logf("waiting %s for %s to self remove...", SelfRemoveTimeout.String(), member)
select {
case <-timeout:
// pass
case err, ok := <-ch:
if ok {
select {
case <-timeout:
// wait until timeout finishes
}
reterr = errwrap.Append(reterr, err)
}
// removed quickly!
}
cancel()
// In case the removed member doesn't remove itself, do it!
resp, err := obj.memberRemove(leaderCtx, memberID)
if err != nil {
return errwrap.Wrapf(err, "member remove error")
}
if resp != nil {
obj.Logf("member removed (forced): %s (%d)", member, memberID)
if err := obj.updateMemberState(resp.Members); err != nil {
return err
}
// Do this I guess, but the TTL will eventually get it.
// Remove the other member to avoid client connections.
if err := obj.advertise(leaderCtx, member, nil); err != nil {
return err
}
}
// Remove the member from our lists to avoid blocking future
// possible MemberList calls which would try and connect to a
// missing member... The lists should get updated from the
// member exiting safely if it doesn't crash, but if it did
// and/or since it's a race to see if the update event will get
// seen before we need the new data, just do it now anyways.
// TODO: Is the above comment still true?
obj.rmMemberState(member) // proactively delete it
obj.Logf("member %s (%d) removed successfully!", member, memberID)
}
// NOTE: We could ensure that etcd reconnects here, but we can just wait
// for the endpoints callback which should see the state change instead.
obj.setEndpoints() // sync client with new endpoints
return reterr
}

98
etcd/chooser/chooser.go Normal file
View File

@@ -0,0 +1,98 @@
// Mgmt
// Copyright (C) 2013-2019+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package chooser
import (
"context"
"github.com/purpleidea/mgmt/etcd/interfaces"
etcdtypes "github.com/coreos/etcd/pkg/types"
)
// Data represents the input data that is passed to the chooser.
type Data struct {
// Hostname is the hostname running this chooser instance. It can be
// used as a unique key in the cluster.
Hostname string // ourself
Debug bool
Logf func(format string, v ...interface{})
}
// Chooser represents the interface you must implement if you want to be able to
// control which cluster members are added and removed. Remember that this can
// get run from any peer (server) machine in the cluster, and that this may
// change as different leaders are elected! Do not assume any state will remain
// between invocations. If you want to maintain hysteresis or state, make sure
// to synchronize it in etcd.
type Chooser interface {
// Validate validates the chooser implementation to ensure the params
// represent a valid instantiation.
Validate() error
// Init initializes the chooser and passes in some useful data and
// handles.
Init(*Data) error
// Connect will be called with a client interfaces.Client that you can
// use if necessary to store some shared state between instances of this
// and watch for external changes. Sharing state between members should
// be avoided if possible, and there is no guarantee that your data
// won't be deleted in a disaster. There are no backups for this,
// regenerate anything you might need. Additionally, this may only be
// used inside the Chooser method, since Connect is only called after
// Init. This is however very useful for implementing special choosers.
// Since some operations can run on connect, it gets a context. If you
// cancel this context, then you might expect that Watch could die too.
// Both of these should get cancelled if you call Disconnect.
Connect(context.Context, interfaces.Client) error // we get given a namespaced client
// Disconnect tells us to cancel our use of the client interface that we
// got from the Connect method. We must not return until we're done.
Disconnect() error
// Watch is called by the engine to allow us to Watch for changes that
// might cause us to want to re-evaluate our nomination decision. It
// should error if it cannot startup. Once it is running, it should send
// a nil error on every event, and an error if things go wrong. When
// Disconnect is shutdown, then that should cause this to exit. When
// this sends events, Choose will usually eventually get called in
// response.
Watch() (chan error, error)
// Choose takes the current peer membership state, and the available
// volunteers, and produces a list of who we should add and who should
// quit. In general, it's best to only remove one member at a time, in
// particular because this will get called iteratively on future events,
// and it can remove subsequent members on the next iteration. One
// important note: when building a new cluster, we do assume that out of
// one available volunteer, and no members, that this first volunteer is
// selected. Make sure that any implementations of this function do this
// as well, since otherwise the hardcoded initial assumption would be
// proven wrong here!
// TODO: we could pass in two lists of hostnames instead of the full
// URLsMap here, but let's keep it more complicated now in case, and
// reduce it down later if needed...
// TODO: should we add a step arg here ?
Choose(membership, volunteers etcdtypes.URLsMap) (nominees, quitters []string, err error)
// Close runs some cleanup routines in case there is anything that you'd
// like to free after we're done.
Close() error
}

285
etcd/chooser/dynamicsize.go Normal file
View File

@@ -0,0 +1,285 @@
// Mgmt
// Copyright (C) 2013-2019+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package chooser
import (
"context"
"fmt"
"strconv"
"sync"
"github.com/purpleidea/mgmt/etcd/interfaces"
etcd "github.com/coreos/etcd/clientv3"
etcdtypes "github.com/coreos/etcd/pkg/types"
)
// XXX: Test causing cluster shutdowns with:
// ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/chooser/dynamicsize/idealclustersize 0
// It is currently broken.
const (
// DefaultIdealDynamicSize is the default target ideal dynamic cluster
// size used for the initial cluster.
DefaultIdealDynamicSize = 5
// IdealDynamicSizePath is the path key used for the chooser. It usually
// gets used with a namespace prefix.
IdealDynamicSizePath = "/dynamicsize/idealclustersize"
)
// DynamicSize is a simple implementation of the Chooser interface. This helps
// select which machines to add and remove as we elastically grow and shrink our
// cluster.
// TODO: think of a better name
type DynamicSize struct {
// IdealClusterSize is the ideal target size for this cluster. If it is
// set to zero, then it will use DefaultIdealDynamicSize as the value.
IdealClusterSize uint16
data *Data // save for later
client interfaces.Client
ctx context.Context
cancel func()
wg *sync.WaitGroup
}
// Validate validates the struct.
func (obj *DynamicSize) Validate() error {
// TODO: if changed to zero, treat as a cluster shutdown signal
if obj.IdealClusterSize < 0 {
return fmt.Errorf("must choose a positive IdealClusterSize value")
}
return nil
}
// Init accepts some useful data and handles.
func (obj *DynamicSize) Init(data *Data) error {
if data.Hostname == "" {
return fmt.Errorf("can't Init with empty Hostname value")
}
if data.Logf == nil {
return fmt.Errorf("no Logf function was specified")
}
if obj.IdealClusterSize == 0 {
obj.IdealClusterSize = DefaultIdealDynamicSize
}
obj.data = data
obj.wg = &sync.WaitGroup{}
return nil
}
// Close runs some cleanup routines.
func (obj *DynamicSize) Close() error {
return nil
}
// Connect is called to accept an etcd.KV namespace that we can use.
func (obj *DynamicSize) Connect(ctx context.Context, client interfaces.Client) error {
obj.client = client
obj.ctx, obj.cancel = context.WithCancel(ctx)
size, err := DynamicSizeGet(obj.ctx, obj.client)
if err == interfaces.ErrNotExist || (err == nil && size <= 0) {
// unset, set in running cluster
changed, err := DynamicSizeSet(obj.ctx, obj.client, obj.IdealClusterSize)
if err == nil && changed {
obj.data.Logf("set dynamic cluster size to: %d", obj.IdealClusterSize)
}
return err
} else if err == nil && size >= 1 {
// unset, get from running cluster (use the valid cluster value)
if obj.IdealClusterSize != size {
obj.data.Logf("using dynamic cluster size of: %d", size)
}
obj.IdealClusterSize = size // get from exiting cluster...
}
return err
}
// Disconnect is called to cancel our use of the etcd.KV connection.
func (obj *DynamicSize) Disconnect() error {
if obj.client != nil { // if connect was not called, don't call this...
obj.cancel()
}
obj.wg.Wait()
return nil
}
// Watch is called to send events anytime we might want to change membership. It
// is also used to watch for changes so that when we get an event, we know to
// honour the change in Choose.
func (obj *DynamicSize) Watch() (chan error, error) {
// NOTE: The body of this function is very similar to the logic in the
// simple client.Watcher implementation that wraps ComplexWatcher.
path := IdealDynamicSizePath
cancelCtx, cancel := context.WithCancel(obj.ctx)
info, err := obj.client.ComplexWatcher(cancelCtx, path)
if err != nil {
defer cancel()
return nil, err
}
ch := make(chan error)
obj.wg.Add(1) // hook in to global wait group
go func() {
defer obj.wg.Done()
defer close(ch)
defer cancel()
var data *interfaces.WatcherData
var ok bool
for {
select {
case data, ok = <-info.Events: // read
if !ok {
return
}
case <-cancelCtx.Done():
continue // wait for ch closure, but don't block
}
size := obj.IdealClusterSize
for _, event := range data.Events { // apply each event
if event.Type != etcd.EventTypePut {
continue
}
key := string(event.Kv.Key)
key = key[len(data.Path):] // remove path prefix
val := string(event.Kv.Value)
if val == "" {
continue // ignore empty values
}
i, err := strconv.Atoi(val)
if err != nil {
continue // ignore bad values
}
size = uint16(i) // save
}
if size == obj.IdealClusterSize {
continue // no change
}
// set before sending the signal
obj.IdealClusterSize = size
if size == 0 { // zero means shutdown
obj.data.Logf("impending cluster shutdown...")
} else {
obj.data.Logf("got new dynamic cluster size of: %d", size)
}
select {
case ch <- data.Err: // send (might be nil!)
case <-cancelCtx.Done():
continue // wait for ch closure, but don't block
}
}
}()
return ch, nil
}
// Choose accepts a list of current membership, and a list of volunteers. From
// that we can decide who we should add and remove. We return a list of those
// nominated, and unnominated users respectively.
func (obj *DynamicSize) Choose(membership, volunteers etcdtypes.URLsMap) ([]string, []string, error) {
// Possible nominees include anyone that has volunteered, but that
// isn't a member.
if obj.data.Debug {
obj.data.Logf("goal: %d members", obj.IdealClusterSize)
}
nominees := []string{}
for hostname := range volunteers {
if _, exists := membership[hostname]; !exists {
nominees = append(nominees, hostname)
}
}
// Possible quitters include anyone that is a member, but that is not a
// volunteer. (They must have unvolunteered.)
quitters := []string{}
for hostname := range membership {
if _, exists := volunteers[hostname]; !exists {
quitters = append(quitters, hostname)
}
}
// What we want to know...
nominated := []string{}
unnominated := []string{}
// We should always only add ONE member at a time!
// TODO: is it okay to remove multiple members at the same time?
if len(nominees) > 0 && len(membership)-len(quitters) < int(obj.IdealClusterSize) {
//unnominated = []string{} // only do one operation at a time
nominated = []string{nominees[0]} // FIXME: use a better picker algorithm
} else if len(quitters) == 0 && len(membership) > int(obj.IdealClusterSize) { // too many members
//nominated = []string{} // only do one operation at a time
for kicked := range membership {
// don't kick ourself unless we are the only one left...
if kicked != obj.data.Hostname || (obj.IdealClusterSize == 0 && len(membership) == 1) {
unnominated = []string{kicked} // FIXME: use a better picker algorithm
break
}
}
} else if len(quitters) > 0 { // must do these before new unvolunteers
unnominated = quitters // get rid of the quitters
}
return nominated, unnominated, nil // perform these changes
}
// DynamicSizeGet gets the currently set dynamic size set in the cluster.
func DynamicSizeGet(ctx context.Context, client interfaces.Client) (uint16, error) {
key := IdealDynamicSizePath
m, err := client.Get(ctx, key) // (map[string]string, error)
if err != nil {
return 0, err
}
val, exists := m[IdealDynamicSizePath]
if !exists {
return 0, interfaces.ErrNotExist
}
i, err := strconv.Atoi(val)
if err != nil {
return 0, fmt.Errorf("bad value")
}
return uint16(i), nil
}
// DynamicSizeSet sets the dynamic size in the cluster. It returns true if it
// changed or set the value.
func DynamicSizeSet(ctx context.Context, client interfaces.Client, size uint16) (bool, error) {
key := IdealDynamicSizePath
val := strconv.FormatUint(uint64(size), 10) // fmt.Sprintf("%d", size)
ifCmps := []etcd.Cmp{
etcd.Compare(etcd.Value(key), "=", val), // desired state
}
elseOps := []etcd.Op{etcd.OpPut(key, val)}
resp, err := client.Txn(ctx, ifCmps, nil, elseOps)
if err != nil {
return false, err
}
// succeeded is set to true if the compare evaluated to true
changed := !resp.Succeeded
return changed, err
}

View File

@@ -1,95 +0,0 @@
// Mgmt
// Copyright (C) 2013-2019+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package etcd
import (
"time"
"github.com/purpleidea/mgmt/util/errwrap"
etcd "github.com/coreos/etcd/clientv3" // "clientv3"
context "golang.org/x/net/context"
)
// ClientEtcd provides a simple etcd client for deploy and status operations.
type ClientEtcd struct {
Seeds []string // list of endpoints to try to connect
client *etcd.Client
}
// GetClient returns a handle to the raw etcd client object.
func (obj *ClientEtcd) GetClient() *etcd.Client {
return obj.client
}
// GetConfig returns the config struct to be used for the etcd client connect.
func (obj *ClientEtcd) GetConfig() etcd.Config {
cfg := etcd.Config{
Endpoints: obj.Seeds,
// RetryDialer chooses the next endpoint to use
// it comes with a default dialer if unspecified
DialTimeout: 5 * time.Second,
}
return cfg
}
// Connect connects the client to a server, and then builds the *API structs.
// If reconnect is true, it will force a reconnect with new config endpoints.
func (obj *ClientEtcd) Connect() error {
if obj.client != nil { // memoize
return nil
}
var err error
cfg := obj.GetConfig()
obj.client, err = etcd.New(cfg) // connect!
if err != nil {
return errwrap.Wrapf(err, "client connect error")
}
return nil
}
// Destroy cleans up the entire etcd client connection.
func (obj *ClientEtcd) Destroy() error {
err := obj.client.Close()
//obj.wg.Wait()
return err
}
// Get runs a get on the client connection. This has the same signature as our
// EmbdEtcd Get function.
func (obj *ClientEtcd) Get(path string, opts ...etcd.OpOption) (map[string]string, error) {
resp, err := obj.client.Get(context.TODO(), path, opts...)
if err != nil || resp == nil {
return nil, err
}
// TODO: write a resp.ToMap() function on https://godoc.org/github.com/coreos/etcd/etcdserver/etcdserverpb#RangeResponse
result := make(map[string]string)
for _, x := range resp.Kvs {
result[string(x.Key)] = string(x.Value)
}
return result, nil
}
// Txn runs a transaction on the client connection. This has the same signature
// as our EmbdEtcd Txn function.
func (obj *ClientEtcd) Txn(ifcmps []etcd.Cmp, thenops, elseops []etcd.Op) (*etcd.TxnResponse, error) {
return obj.client.KV.Txn(context.TODO()).If(ifcmps...).Then(thenops...).Else(elseops...).Commit()
}

View File

@@ -15,60 +15,43 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package etcd
package resources
import (
"context"
"fmt"
"log"
"strings"
"github.com/purpleidea/mgmt/engine"
engineUtil "github.com/purpleidea/mgmt/engine/util"
"github.com/purpleidea/mgmt/etcd/interfaces"
"github.com/purpleidea/mgmt/util"
etcd "github.com/coreos/etcd/clientv3"
)
const (
ns = "" // in case we want to add one back in
)
// WatchResources returns a channel that outputs events when exported resources
// change.
// TODO: Filter our watch (on the server side if possible) based on the
// collection prefixes and filters that we care about...
func WatchResources(obj *EmbdEtcd) chan error {
ch := make(chan error, 1) // buffer it so we can measure it
path := fmt.Sprintf("%s/exported/", NS)
callback := func(re *RE) error {
// TODO: is this even needed? it used to happen on conn errors
log.Printf("Etcd: Watch: Path: %v", path) // event
if re == nil || re.response.Canceled {
return fmt.Errorf("watch is empty") // will cause a CtxError+retry
}
// we normally need to check if anything changed since the last
// event, since a set (export) with no changes still causes the
// watcher to trigger and this would cause an infinite loop. we
// don't need to do this check anymore because we do the export
// transactionally, and only if a change is needed. since it is
// atomic, all the changes arrive together which avoids dupes!!
if len(ch) == 0 { // send event only if one isn't pending
// this check avoids multiple events all queueing up and then
// being released continuously long after the changes stopped
// do not block!
ch <- nil // event
}
return nil
}
_, _ = obj.AddWatcher(path, callback, true, false, etcd.WithPrefix()) // no need to check errors
return ch
func WatchResources(ctx context.Context, client interfaces.Client) (chan error, error) {
path := fmt.Sprintf("%s/exported/", ns)
return client.Watcher(ctx, path, etcd.WithPrefix())
}
// SetResources exports all of the resources which we pass in to etcd.
func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) error {
func SetResources(ctx context.Context, client interfaces.Client, hostname string, resourceList []engine.Res) error {
// key structure is $NS/exported/$hostname/resources/$uid = $data
var kindFilter []string // empty to get from everyone
hostnameFilter := []string{hostname}
// this is not a race because we should only be reading keys which we
// set, and there should not be any contention with other hosts here!
originals, err := GetResources(obj, hostnameFilter, kindFilter)
originals, err := GetResources(ctx, client, hostnameFilter, kindFilter)
if err != nil {
return err
}
@@ -81,10 +64,10 @@ func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) err
ops := []etcd.Op{} // list of ops in this transaction
for _, res := range resourceList {
if res.Kind() == "" {
log.Fatalf("Etcd: SetResources: Error: Empty kind: %v", res.Name())
return fmt.Errorf("empty kind: %s", res.Name())
}
uid := fmt.Sprintf("%s/%s", res.Kind(), res.Name())
path := fmt.Sprintf("%s/exported/%s/resources/%s", NS, hostname, uid)
path := fmt.Sprintf("%s/exported/%s/resources/%s", ns, hostname, uid)
if data, err := engineUtil.ResToB64(res); err == nil {
ifs = append(ifs, etcd.Compare(etcd.Value(path), "=", data)) // desired state
ops = append(ops, etcd.OpPut(path, data))
@@ -106,10 +89,10 @@ func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) err
// delete old, now unused resources here...
for _, res := range originals {
if res.Kind() == "" {
log.Fatalf("Etcd: SetResources: Error: Empty kind: %v", res.Name())
return fmt.Errorf("empty kind: %s", res.Name())
}
uid := fmt.Sprintf("%s/%s", res.Kind(), res.Name())
path := fmt.Sprintf("%s/exported/%s/resources/%s", NS, hostname, uid)
path := fmt.Sprintf("%s/exported/%s/resources/%s", ns, hostname, uid)
if match(res, resourceList) { // if we match, no need to delete!
continue
@@ -124,9 +107,9 @@ func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) err
// it's important to do this in one transaction, and atomically, because
// this way, we only generate one watch event, and only when it's needed
if hasDeletes { // always run, ifs don't matter
_, err = obj.Txn(nil, ops, nil) // TODO: does this run? it should!
_, err = client.Txn(ctx, nil, ops, nil) // TODO: does this run? it should!
} else {
_, err = obj.Txn(ifs, nil, ops) // TODO: do we need to look at response?
_, err = client.Txn(ctx, ifs, nil, ops) // TODO: do we need to look at response?
}
return err
}
@@ -136,11 +119,11 @@ func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) err
// TODO: Expand this with a more powerful filter based on what we eventually
// support in our collect DSL. Ideally a server side filter like WithFilter()
// We could do this if the pattern was $NS/exported/$kind/$hostname/$uid = $data.
func GetResources(obj *EmbdEtcd, hostnameFilter, kindFilter []string) ([]engine.Res, error) {
func GetResources(ctx context.Context, client interfaces.Client, hostnameFilter, kindFilter []string) ([]engine.Res, error) {
// key structure is $NS/exported/$hostname/resources/$uid = $data
path := fmt.Sprintf("%s/exported/", NS)
path := fmt.Sprintf("%s/exported/", ns)
resourceList := []engine.Res{}
keyMap, err := obj.Get(path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend))
keyMap, err := client.Get(ctx, path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend))
if err != nil {
return nil, fmt.Errorf("could not get resources: %v", err)
}
@@ -160,7 +143,9 @@ func GetResources(obj *EmbdEtcd, hostnameFilter, kindFilter []string) ([]engine.
if kind == "" {
return nil, fmt.Errorf("unexpected kind chunk")
}
if name == "" { // TODO: should I check this?
return nil, fmt.Errorf("unexpected empty name")
}
// FIXME: ideally this would be a server side filter instead!
if len(hostnameFilter) > 0 && !util.StrInList(hostname, hostnameFilter) {
continue
@@ -171,9 +156,9 @@ func GetResources(obj *EmbdEtcd, hostnameFilter, kindFilter []string) ([]engine.
continue
}
if obj, err := engineUtil.B64ToRes(val); err == nil {
log.Printf("Etcd: Get: (Hostname, Kind, Name): (%s, %s, %s)", hostname, kind, name)
resourceList = append(resourceList, obj)
if res, err := engineUtil.B64ToRes(val); err == nil {
//obj.Logf("Get: (Hostname, Kind, Name): (%s, %s, %s)", hostname, kind, name)
resourceList = append(resourceList, res)
} else {
return nil, fmt.Errorf("can't convert from B64: %v", err)
}

484
etcd/client/simple.go Normal file
View File

@@ -0,0 +1,484 @@
// Mgmt
// Copyright (C) 2013-2019+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package client
import (
"context"
"fmt"
"sync"
"time"
"github.com/purpleidea/mgmt/etcd/interfaces"
"github.com/purpleidea/mgmt/util/errwrap"
etcd "github.com/coreos/etcd/clientv3" // "clientv3"
"github.com/coreos/etcd/clientv3/namespace"
)
// method represents the method we used to build the simple client.
type method uint8
const (
methodError method = iota
methodSeeds
methodClient
methodNamespace
)
// NewClientFromSeeds builds a new simple client by connecting to a list of
// seeds.
func NewClientFromSeeds(seeds []string) *Simple {
return &Simple{
method: methodSeeds,
wg: &sync.WaitGroup{},
seeds: seeds,
}
}
// NewClientFromSeedsNamespace builds a new simple client by connecting to a
// list of seeds and ensuring all key access is prefixed with a namespace.
func NewClientFromSeedsNamespace(seeds []string, ns string) *Simple {
return &Simple{
method: methodSeeds,
wg: &sync.WaitGroup{},
seeds: seeds,
namespace: ns,
}
}
// NewClientFromClient builds a new simple client by taking an existing client
// struct. It does not disconnect this when Close is called, as that is up to
// the parent, which is the owner of that client input struct.
func NewClientFromClient(client *etcd.Client) *Simple {
return &Simple{
method: methodClient,
wg: &sync.WaitGroup{},
client: client,
}
}
// NewClientFromNamespaceStr builds a new simple client by taking an existing
// client and a string namespace. Warning, this doesn't properly nest the
// namespaces.
func NewClientFromNamespaceStr(client *etcd.Client, ns string) *Simple {
if client == nil {
return &Simple{
method: methodError,
err: fmt.Errorf("client is nil"),
}
}
kv := client.KV
w := client.Watcher
if ns != "" { // only layer if not empty
kv = namespace.NewKV(client.KV, ns)
w = namespace.NewWatcher(client.Watcher, ns)
}
return &Simple{
method: methodClient, // similar enough to this one to share it!
wg: &sync.WaitGroup{},
client: client, // store for GetClient()
kv: kv,
w: w,
}
}
// NewClientFromSimple builds a simple client from an existing client interface
// which must be a simple client. This awkward method is required so that
// namespace nesting works properly, because the *etcd.Client doesn't directly
// pass through the namespace. I'd love to nuke this function, but it's good
// enough for now.
func NewClientFromSimple(client interfaces.Client, ns string) *Simple {
if client == nil {
return &Simple{
method: methodError,
err: fmt.Errorf("client is nil"),
}
}
simple, ok := client.(*Simple)
if !ok {
return &Simple{
method: methodError,
err: fmt.Errorf("client is not simple"),
}
}
kv := simple.kv
w := simple.w
if ns != "" { // only layer if not empty
kv = namespace.NewKV(simple.kv, ns)
w = namespace.NewWatcher(simple.w, ns)
}
return &Simple{
method: methodNamespace,
wg: &sync.WaitGroup{},
client: client.GetClient(), // store for GetClient()
kv: kv,
w: w,
}
}
// NewClientFromNamespace builds a new simple client by taking an existing set
// of interface API's that we might use.
func NewClientFromNamespace(client *etcd.Client, kv etcd.KV, w etcd.Watcher) *Simple {
return &Simple{
method: methodNamespace,
wg: &sync.WaitGroup{},
client: client, // store for GetClient()
kv: kv,
w: w,
}
}
// Simple provides a simple etcd client for deploy and status operations. You
// can set Debug and Logf after you've built this with one of the NewClient*
// methods.
type Simple struct {
Debug bool
Logf func(format string, v ...interface{})
method method
wg *sync.WaitGroup
// err is the error we set when using methodError
err error
// seeds is the list of endpoints to try to connect to.
seeds []string
namespace string
// client is the etcd client connection.
client *etcd.Client
// kv and w are the namespaced interfaces that we got passed.
kv etcd.KV
w etcd.Watcher
}
// logf is a safe wrapper around the Logf parameter that doesn't panic if the
// user didn't pass a logger in.
func (obj *Simple) logf(format string, v ...interface{}) {
if obj.Logf == nil {
return
}
obj.Logf(format, v...)
}
// config returns the config struct to be used for the etcd client connect.
func (obj *Simple) config() etcd.Config {
cfg := etcd.Config{
Endpoints: obj.seeds,
// RetryDialer chooses the next endpoint to use
// it comes with a default dialer if unspecified
DialTimeout: 5 * time.Second,
}
return cfg
}
// connect connects the client to a server, and then builds the *API structs.
func (obj *Simple) connect() error {
if obj.client != nil { // memoize
return nil
}
var err error
cfg := obj.config()
obj.client, err = etcd.New(cfg) // connect!
if err != nil {
return errwrap.Wrapf(err, "client connect error")
}
obj.kv = obj.client.KV
obj.w = obj.client.Watcher
if obj.namespace != "" { // bonus feature of seeds method
obj.kv = namespace.NewKV(obj.client.KV, obj.namespace)
obj.w = namespace.NewWatcher(obj.client.Watcher, obj.namespace)
}
return nil
}
// Init starts up the struct.
func (obj *Simple) Init() error {
// By the end of this, we must have obj.kv and obj.w available for use.
switch obj.method {
case methodError:
return obj.err // use the error we set
case methodSeeds:
if len(obj.seeds) <= 0 {
return fmt.Errorf("zero seeds")
}
return obj.connect()
case methodClient:
if obj.client == nil {
return fmt.Errorf("no client")
}
if obj.kv == nil { // overwrite if not specified!
obj.kv = obj.client.KV
}
if obj.w == nil {
obj.w = obj.client.Watcher
}
return nil
case methodNamespace:
if obj.kv == nil || obj.w == nil {
return fmt.Errorf("empty namespace")
}
return nil
}
return fmt.Errorf("unknown method: %+v", obj.method)
}
// Close cleans up the struct after we're finished.
func (obj *Simple) Close() error {
defer obj.wg.Wait()
switch obj.method {
case methodError: // for consistency
return fmt.Errorf("did not Init")
case methodSeeds:
return obj.client.Close()
case methodClient:
// we we're given a client, so we don't own it or close it
return nil
case methodNamespace:
return nil
}
return fmt.Errorf("unknown method: %+v", obj.method)
}
// GetClient returns a handle to an open etcd Client. This is needed for certain
// upstream API's that don't support passing in KV and Watcher instead.
func (obj *Simple) GetClient() *etcd.Client {
return obj.client
}
// Set runs a set operation. If you'd like more information about whether a
// value changed or not, use Txn instead.
func (obj *Simple) Set(ctx context.Context, key, value string, opts ...etcd.OpOption) error {
// key is the full key path
resp, err := obj.kv.Put(ctx, key, value, opts...)
if obj.Debug {
obj.logf("set(%s): %v", key, resp) // bonus
}
return err
}
// Get runs a get operation.
func (obj *Simple) Get(ctx context.Context, path string, opts ...etcd.OpOption) (map[string]string, error) {
resp, err := obj.kv.Get(ctx, path, opts...)
if err != nil {
return nil, err
}
if resp == nil {
return nil, fmt.Errorf("empty response")
}
// TODO: write a resp.ToMap() function on https://godoc.org/github.com/coreos/etcd/etcdserver/etcdserverpb#RangeResponse
result := make(map[string]string)
for _, x := range resp.Kvs {
result[string(x.Key)] = string(x.Value)
}
return result, nil
}
// Del runs a delete operation.
func (obj *Simple) Del(ctx context.Context, path string, opts ...etcd.OpOption) (int64, error) {
resp, err := obj.kv.Delete(ctx, path, opts...)
if err == nil {
return resp.Deleted, nil
}
return -1, err
}
// Txn runs a transaction.
func (obj *Simple) Txn(ctx context.Context, ifCmps []etcd.Cmp, thenOps, elseOps []etcd.Op) (*etcd.TxnResponse, error) {
resp, err := obj.kv.Txn(ctx).If(ifCmps...).Then(thenOps...).Else(elseOps...).Commit()
if obj.Debug {
obj.logf("txn: %v", resp) // bonus
}
return resp, err
}
// Watcher is a watcher that returns a chan of error's instead of a chan with
// all sorts of watcher data. This is useful when we only want an event signal,
// but we don't care about the specifics.
func (obj *Simple) Watcher(ctx context.Context, path string, opts ...etcd.OpOption) (chan error, error) {
cancelCtx, cancel := context.WithCancel(ctx)
info, err := obj.ComplexWatcher(cancelCtx, path, opts...)
if err != nil {
defer cancel()
return nil, err
}
ch := make(chan error)
obj.wg.Add(1) // hook in to global wait group
go func() {
defer obj.wg.Done()
defer close(ch)
defer cancel()
var data *interfaces.WatcherData
var ok bool
for {
select {
case data, ok = <-info.Events: // read
if !ok {
return
}
case <-cancelCtx.Done():
continue // wait for ch closure, but don't block
}
select {
case ch <- data.Err: // send (might be nil!)
case <-cancelCtx.Done():
continue // wait for ch closure, but don't block
}
}
}()
return ch, nil
}
// ComplexWatcher is a more capable watcher that also returns data information.
// This starts a watch request. It writes on a channel that you can follow to
// know when an event or an error occurs. It always sends one startup event. It
// will not return until the watch has been started. If it cannot start, then it
// will return an error. Remember to add the WithPrefix() option if you want to
// watch recursively.
// TODO: do we need to support retry and changed client connections?
// XXX: do we need to track last successful revision and retry from there?
// XXX: if so, use:
// lastRev := response.Header.Revision // TODO: +1 ?
// etcd.WithRev(rev)
func (obj *Simple) ComplexWatcher(ctx context.Context, path string, opts ...etcd.OpOption) (*interfaces.WatcherInfo, error) {
if obj.client == nil { // catch bugs, this often means programming error
return nil, fmt.Errorf("client is nil") // extra safety!
}
cancelCtx, cancel := context.WithCancel(ctx)
eventsChan := make(chan *interfaces.WatcherData) // channel of runtime errors
var count uint8
wg := &sync.WaitGroup{}
// TODO: if we can detect the use of WithCreatedNotify, we don't need to
// hard-code it down below... https://github.com/coreos/etcd/issues/9689
// XXX: proof of concept patch: https://github.com/coreos/etcd/pull/9705
//for _, op := range opts {
// //if op.Cmp(etcd.WithCreatedNotify()) == nil { // would be best
// if etcd.OpOptionCmp(op, etcd.WithCreatedNotify()) == nil {
// count++
// wg.Add(1)
// break
// }
//}
count++
wg.Add(1)
wOpts := []etcd.OpOption{
etcd.WithCreatedNotify(),
}
wOpts = append(wOpts, opts...)
var err error
obj.wg.Add(1) // hook in to global wait group
go func() {
defer obj.wg.Done()
defer close(eventsChan)
defer cancel() // it's safe to cancel() more than once!
ch := obj.w.Watch(cancelCtx, path, wOpts...)
for {
var resp etcd.WatchResponse
var ok bool
var created bool
select {
case resp, ok = <-ch:
if !ok {
if count > 0 { // closed before startup
// set err in parent scope!
err = fmt.Errorf("watch closed")
count--
wg.Done()
}
return
}
// the watch is now running!
if count > 0 && resp.Created {
created = true
count--
wg.Done()
}
isCanceled := resp.Canceled || resp.Err() == context.Canceled
// TODO: this might not be needed
if resp.Header.Revision == 0 { // by inspection
if obj.Debug {
obj.logf("watch: received empty message") // switched client connection
}
isCanceled = true
}
if isCanceled {
data := &interfaces.WatcherData{
Err: context.Canceled,
}
select { // send the error
case eventsChan <- data:
case <-ctx.Done():
return
}
continue // channel should close shortly
}
}
// TODO: consider processing the response data into a
// more useful form for the callback...
data := &interfaces.WatcherData{
Created: created,
Path: path,
Header: resp.Header,
Events: resp.Events,
Err: resp.Err(),
}
select { // send the event
case eventsChan <- data:
case <-ctx.Done():
return
}
}
}()
wg.Wait() // wait for created event before we return
return &interfaces.WatcherInfo{
Cancel: cancel,
Events: eventsChan,
}, err
}

View File

@@ -15,20 +15,22 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package etcd
package str
import (
"errors"
"context"
"fmt"
"github.com/purpleidea/mgmt/etcd/interfaces"
"github.com/purpleidea/mgmt/util/errwrap"
etcd "github.com/coreos/etcd/clientv3"
etcdutil "github.com/coreos/etcd/clientv3/clientv3util"
)
// ErrNotExist is returned when GetStr can not find the requested key.
// TODO: https://dave.cheney.net/2016/04/07/constant-errors
var ErrNotExist = errors.New("errNotExist")
const (
ns = "" // in case we want to add one back in
)
// WatchStr returns a channel which spits out events on key activity.
// FIXME: It should close the channel when it's done, and spit out errors when
@@ -37,37 +39,23 @@ var ErrNotExist = errors.New("errNotExist")
// done, does that mean we leak go-routines since it might still be running, but
// perhaps even blocked??? Could this cause a dead-lock? Should we instead return
// some sort of struct which has a close method with it to ask for a shutdown?
func WatchStr(obj *EmbdEtcd, key string) chan error {
func WatchStr(ctx context.Context, client interfaces.Client, key string) (chan error, error) {
// new key structure is $NS/strings/$key = $data
path := fmt.Sprintf("%s/strings/%s", NS, key)
ch := make(chan error, 1)
// FIXME: fix our API so that we get a close event on shutdown.
callback := func(re *RE) error {
// TODO: is this even needed? it used to happen on conn errors
//log.Printf("Etcd: Watch: Path: %v", path) // event
if re == nil || re.response.Canceled {
return fmt.Errorf("watch is empty") // will cause a CtxError+retry
}
if len(ch) == 0 { // send event only if one isn't pending
ch <- nil // event
}
return nil
}
_, _ = obj.AddWatcher(path, callback, true, false, etcd.WithPrefix()) // no need to check errors
return ch
path := fmt.Sprintf("%s/strings/%s", ns, key)
return client.Watcher(ctx, path)
}
// GetStr collects the string which matches a global namespace in etcd.
func GetStr(obj *EmbdEtcd, key string) (string, error) {
func GetStr(ctx context.Context, client interfaces.Client, key string) (string, error) {
// new key structure is $NS/strings/$key = $data
path := fmt.Sprintf("%s/strings/%s", NS, key)
keyMap, err := obj.Get(path, etcd.WithPrefix())
path := fmt.Sprintf("%s/strings/%s", ns, key)
keyMap, err := client.Get(ctx, path, etcd.WithPrefix())
if err != nil {
return "", errwrap.Wrapf(err, "could not get strings in: %s", key)
}
if len(keyMap) == 0 {
return "", ErrNotExist
return "", interfaces.ErrNotExist
}
if count := len(keyMap); count != 1 {
@@ -79,23 +67,21 @@ func GetStr(obj *EmbdEtcd, key string) (string, error) {
return "", fmt.Errorf("path `%s` is missing", path)
}
//log.Printf("Etcd: GetStr(%s): %s", key, val)
return val, nil
}
// SetStr sets a key and hostname pair to a certain value. If the value is
// nil, then it deletes the key. Otherwise the value should point to a string.
// TODO: TTL or delete disconnect?
func SetStr(obj *EmbdEtcd, key string, data *string) error {
func SetStr(ctx context.Context, client interfaces.Client, key string, data *string) error {
// key structure is $NS/strings/$key = $data
path := fmt.Sprintf("%s/strings/%s", NS, key)
path := fmt.Sprintf("%s/strings/%s", ns, key)
ifs := []etcd.Cmp{} // list matching the desired state
ops := []etcd.Op{} // list of ops in this transaction (then)
els := []etcd.Op{} // list of ops in this transaction (else)
if data == nil { // perform a delete
// TODO: use https://github.com/coreos/etcd/pull/7417 if merged
//ifs = append(ifs, etcd.KeyExists(path))
ifs = append(ifs, etcd.Compare(etcd.Version(path), ">", 0))
ifs = append(ifs, etcdutil.KeyExists(path))
//ifs = append(ifs, etcd.Compare(etcd.Version(path), ">", 0))
ops = append(ops, etcd.OpDelete(path))
} else {
data := *data // get the real value
@@ -105,6 +91,6 @@ func SetStr(obj *EmbdEtcd, key string, data *string) error {
// it's important to do this in one transaction, and atomically, because
// this way, we only generate one watch event, and only when it's needed
_, err := obj.Txn(ifs, ops, els) // TODO: do we need to look at response?
_, err := client.Txn(ctx, ifs, ops, els) // TODO: do we need to look at response?
return errwrap.Wrapf(err, "could not set strings in: %s", key)
}

View File

@@ -15,50 +15,43 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package etcd
package strmap
import (
"context"
"fmt"
"strings"
"github.com/purpleidea/mgmt/etcd/interfaces"
"github.com/purpleidea/mgmt/util"
"github.com/purpleidea/mgmt/util/errwrap"
etcd "github.com/coreos/etcd/clientv3"
etcdutil "github.com/coreos/etcd/clientv3/clientv3util"
)
const (
ns = "" // in case we want to add one back in
)
// WatchStrMap returns a channel which spits out events on key activity.
// FIXME: It should close the channel when it's done, and spit out errors when
// something goes wrong.
func WatchStrMap(obj *EmbdEtcd, key string) chan error {
func WatchStrMap(ctx context.Context, client interfaces.Client, key string) (chan error, error) {
// new key structure is $NS/strings/$key/$hostname = $data
path := fmt.Sprintf("%s/strings/%s", NS, key)
ch := make(chan error, 1)
// FIXME: fix our API so that we get a close event on shutdown.
callback := func(re *RE) error {
// TODO: is this even needed? it used to happen on conn errors
//log.Printf("Etcd: Watch: Path: %v", path) // event
if re == nil || re.response.Canceled {
return fmt.Errorf("watch is empty") // will cause a CtxError+retry
}
if len(ch) == 0 { // send event only if one isn't pending
ch <- nil // event
}
return nil
}
_, _ = obj.AddWatcher(path, callback, true, false, etcd.WithPrefix()) // no need to check errors
return ch
path := fmt.Sprintf("%s/strings/%s", ns, key)
return client.Watcher(ctx, path, etcd.WithPrefix())
}
// GetStrMap collects all of the strings which match a namespace in etcd.
func GetStrMap(obj *EmbdEtcd, hostnameFilter []string, key string) (map[string]string, error) {
func GetStrMap(ctx context.Context, client interfaces.Client, hostnameFilter []string, key string) (map[string]string, error) {
// old key structure is $NS/strings/$hostname/$key = $data
// new key structure is $NS/strings/$key/$hostname = $data
// FIXME: if we have the $key as the last token (old key structure), we
// can allow the key to contain the slash char, otherwise we need to
// verify that one isn't present in the input string.
path := fmt.Sprintf("%s/strings/%s", NS, key)
keyMap, err := obj.Get(path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend))
path := fmt.Sprintf("%s/strings/%s", ns, key)
keyMap, err := client.Get(ctx, path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend))
if err != nil {
return nil, errwrap.Wrapf(err, "could not get strings in: %s", key)
}
@@ -91,16 +84,15 @@ func GetStrMap(obj *EmbdEtcd, hostnameFilter []string, key string) (map[string]s
// SetStrMap sets a key and hostname pair to a certain value. If the value is
// nil, then it deletes the key. Otherwise the value should point to a string.
// TODO: TTL or delete disconnect?
func SetStrMap(obj *EmbdEtcd, hostname, key string, data *string) error {
func SetStrMap(ctx context.Context, client interfaces.Client, hostname, key string, data *string) error {
// key structure is $NS/strings/$key/$hostname = $data
path := fmt.Sprintf("%s/strings/%s/%s", NS, key, hostname)
path := fmt.Sprintf("%s/strings/%s/%s", ns, key, hostname)
ifs := []etcd.Cmp{} // list matching the desired state
ops := []etcd.Op{} // list of ops in this transaction (then)
els := []etcd.Op{} // list of ops in this transaction (else)
if data == nil { // perform a delete
// TODO: use https://github.com/coreos/etcd/pull/7417 if merged
//ifs = append(ifs, etcd.KeyExists(path))
ifs = append(ifs, etcd.Compare(etcd.Version(path), ">", 0))
ifs = append(ifs, etcdutil.KeyExists(path))
//ifs = append(ifs, etcd.Compare(etcd.Version(path), ">", 0))
ops = append(ops, etcd.OpDelete(path))
} else {
data := *data // get the real value
@@ -110,6 +102,6 @@ func SetStrMap(obj *EmbdEtcd, hostname, key string, data *string) error {
// it's important to do this in one transaction, and atomically, because
// this way, we only generate one watch event, and only when it's needed
_, err := obj.Txn(ifs, ops, els) // TODO: do we need to look at response?
_, err := client.Txn(ctx, ifs, ops, els) // TODO: do we need to look at response?
return errwrap.Wrapf(err, "could not set strings in: %s", key)
}

49
etcd/converger.go Normal file
View File

@@ -0,0 +1,49 @@
// Mgmt
// Copyright (C) 2013-2019+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package etcd
import (
"context"
"fmt"
"github.com/purpleidea/mgmt/util/errwrap"
etcd "github.com/coreos/etcd/clientv3"
)
// setHostnameConverged sets whether a specific hostname is converged.
func (obj *EmbdEtcd) setHostnameConverged(ctx context.Context, hostname string, isConverged bool) error {
if obj.Debug {
obj.Logf("setHostnameConverged(%s): %t", hostname, isConverged)
defer obj.Logf("setHostnameConverged(%s): done!", hostname)
}
key := fmt.Sprintf(obj.NS+convergedPathFmt, hostname)
data := fmt.Sprintf("%t", isConverged)
// XXX: bug: https://github.com/etcd-io/etcd/issues/10566
// XXX: reverse things with els to workaround the bug :(
//ifs := []etcd.Cmp{etcd.Compare(etcd.Value(key), "!=", data)} // desired state
//ops := []etcd.Op{etcd.OpPut(key, data, etcd.WithLease(obj.leaseID))}
ifs := []etcd.Cmp{etcd.Compare(etcd.Value(key), "=", data)} // desired state
ifs = append(ifs, etcd.Compare(etcd.LeaseValue(key), "=", obj.leaseID))
els := []etcd.Op{etcd.OpPut(key, data, etcd.WithLease(obj.leaseID))}
_, err := obj.client.Txn(ctx, ifs, nil, els)
return errwrap.Wrapf(err, "set hostname converged failed")
}

View File

@@ -15,16 +15,20 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package etcd
package deployer
import (
"context"
"fmt"
"strconv"
"strings"
"sync"
"github.com/purpleidea/mgmt/etcd/interfaces"
"github.com/purpleidea/mgmt/util/errwrap"
etcd "github.com/coreos/etcd/clientv3"
etcdutil "github.com/coreos/etcd/clientv3/clientv3util"
)
const (
@@ -33,34 +37,52 @@ const (
hashPath = "hash"
)
// WatchDeploy returns a channel which spits out events on new deploy activity.
// FIXME: It should close the channel when it's done, and spit out errors when
// something goes wrong.
func WatchDeploy(obj *EmbdEtcd) chan error {
// key structure is $NS/deploy/$id/payload = $data
path := fmt.Sprintf("%s/%s/", NS, deployPath)
ch := make(chan error, 1)
// FIXME: fix our API so that we get a close event on shutdown.
callback := func(re *RE) error {
// TODO: is this even needed? it used to happen on conn errors
//log.Printf("Etcd: Watch: Path: %v", path) // event
if re == nil || re.response.Canceled {
return fmt.Errorf("watch is empty") // will cause a CtxError+retry
// SimpleDeploy is a deploy struct that provides all of the needed deploy
// methods. It requires that you give it a Client interface so that it can
// perform its remote work. You must call Init before you use it, and Close when
// you are done.
type SimpleDeploy struct {
Client interfaces.Client
Debug bool
Logf func(format string, v ...interface{})
ns string // TODO: if we ever need to hardcode a base path
wg *sync.WaitGroup
}
if len(ch) == 0 { // send event only if one isn't pending
ch <- nil // event
// Init validates the deploy structure and prepares it for first use.
func (obj *SimpleDeploy) Init() error {
if obj.Client == nil {
return fmt.Errorf("the Client was not specified")
}
obj.wg = &sync.WaitGroup{}
return nil
}
_, _ = obj.AddWatcher(path, callback, true, false, etcd.WithPrefix()) // no need to check errors
return ch
// Close cleans up after using the deploy struct and waits for any ongoing
// watches to exit before it returns.
func (obj *SimpleDeploy) Close() error {
obj.wg.Wait()
return nil
}
// WatchDeploy returns a channel which spits out events on new deploy activity.
// It closes the channel when it's done, and spits out errors when something
// goes wrong. If it can't start up, it errors immediately. The returned channel
// is buffered, so that a quick succession of events will get discarded.
func (obj *SimpleDeploy) WatchDeploy(ctx context.Context) (chan error, error) {
// key structure is $NS/deploy/$id/payload = $data
path := fmt.Sprintf("%s/%s/", obj.ns, deployPath)
// FIXME: obj.wg.Add(1) && obj.wg.Done()
return obj.Client.Watcher(ctx, path, etcd.WithPrefix())
}
// GetDeploys gets all the available deploys.
func GetDeploys(obj Client) (map[uint64]string, error) {
func (obj *SimpleDeploy) GetDeploys(ctx context.Context) (map[uint64]string, error) {
// key structure is $NS/deploy/$id/payload = $data
path := fmt.Sprintf("%s/%s/", NS, deployPath)
keyMap, err := obj.Get(path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend))
path := fmt.Sprintf("%s/%s/", obj.ns, deployPath)
keyMap, err := obj.Client.Get(ctx, path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend))
if err != nil {
return nil, errwrap.Wrapf(err, "could not get deploy")
}
@@ -86,7 +108,7 @@ func GetDeploys(obj Client) (map[uint64]string, error) {
}
// TODO: do some sort of filtering here?
//log.Printf("Etcd: GetDeploys(%s): Id => Data: %d => %s", key, id, val)
//obj.Logf("GetDeploys(%s): Id => Data: %d => %s", key, id, val)
result[id] = val
}
return result, nil
@@ -107,8 +129,8 @@ func calculateMax(deploys map[uint64]string) uint64 {
// an id of 0, you'll get back an empty deploy without error. This is useful so
// that you can pass through this function easily.
// FIXME: implement this more efficiently so that it doesn't have to download *all* the old deploys from etcd!
func GetDeploy(obj Client, id uint64) (string, error) {
result, err := GetDeploys(obj)
func (obj *SimpleDeploy) GetDeploy(ctx context.Context, id uint64) (string, error) {
result, err := obj.GetDeploys(ctx)
if err != nil {
return "", err
}
@@ -130,9 +152,9 @@ func GetDeploy(obj Client, id uint64) (string, error) {
// zero. You must increment the returned value by one when you add a deploy. If
// two or more clients race for this deploy id, then the loser is not committed,
// and must repeat this GetMaxDeployID process until it succeeds with a commit!
func GetMaxDeployID(obj Client) (uint64, error) {
func (obj *SimpleDeploy) GetMaxDeployID(ctx context.Context) (uint64, error) {
// TODO: this was all implemented super inefficiently, fix up for perf!
deploys, err := GetDeploys(obj) // get previous deploys
deploys, err := obj.GetDeploys(ctx) // get previous deploys
if err != nil {
return 0, errwrap.Wrapf(err, "error getting previous deploys")
}
@@ -148,29 +170,28 @@ func GetMaxDeployID(obj Client) (uint64, error) {
// contributors pushing conflicting deploys. This isn't git specific, and so any
// arbitrary string hash can be used.
// FIXME: prune old deploys from the store when they aren't needed anymore...
func AddDeploy(obj Client, id uint64, hash, pHash string, data *string) error {
func (obj *SimpleDeploy) AddDeploy(ctx context.Context, id uint64, hash, pHash string, data *string) error {
// key structure is $NS/deploy/$id/payload = $data
// key structure is $NS/deploy/$id/hash = $hash
path := fmt.Sprintf("%s/%s/%d/%s", NS, deployPath, id, payloadPath)
tPath := fmt.Sprintf("%s/%s/%d/%s", NS, deployPath, id, hashPath)
path := fmt.Sprintf("%s/%s/%d/%s", obj.ns, deployPath, id, payloadPath)
tPath := fmt.Sprintf("%s/%s/%d/%s", obj.ns, deployPath, id, hashPath)
ifs := []etcd.Cmp{} // list matching the desired state
ops := []etcd.Op{} // list of ops in this transaction (then)
// TODO: use https://github.com/coreos/etcd/pull/7417 if merged
// we're append only, so ensure this unique deploy id doesn't exist
ifs = append(ifs, etcd.Compare(etcd.Version(path), "=", 0)) // KeyMissing
//ifs = append(ifs, etcd.KeyMissing(path))
//ifs = append(ifs, etcd.Compare(etcd.Version(path), "=", 0)) // KeyMissing
ifs = append(ifs, etcdutil.KeyMissing(path))
// don't look for previous deploy if this is the first deploy ever
if id > 1 {
// we append sequentially, so ensure previous key *does* exist
prev := fmt.Sprintf("%s/%s/%d/%s", NS, deployPath, id-1, payloadPath)
ifs = append(ifs, etcd.Compare(etcd.Version(prev), ">", 0)) // KeyExists
//ifs = append(ifs, etcd.KeyExists(prev))
prev := fmt.Sprintf("%s/%s/%d/%s", obj.ns, deployPath, id-1, payloadPath)
//ifs = append(ifs, etcd.Compare(etcd.Version(prev), ">", 0)) // KeyExists
ifs = append(ifs, etcdutil.KeyExists(prev))
if hash != "" && pHash != "" {
// does the previously stored hash match what we expect?
prevHash := fmt.Sprintf("%s/%s/%d/%s", NS, deployPath, id-1, hashPath)
prevHash := fmt.Sprintf("%s/%s/%d/%s", obj.ns, deployPath, id-1, hashPath)
ifs = append(ifs, etcd.Compare(etcd.Value(prevHash), "=", pHash))
}
}
@@ -182,7 +203,7 @@ func AddDeploy(obj Client, id uint64, hash, pHash string, data *string) error {
// it's important to do this in one transaction, and atomically, because
// this way, we only generate one watch event, and only when it's needed
result, err := obj.Txn(ifs, ops, nil)
result, err := obj.Client.Txn(ctx, ifs, ops, nil)
if err != nil {
return errwrap.Wrapf(err, "error creating deploy id %d", id)
}

View File

@@ -18,13 +18,10 @@
package etcd
import (
etcd "github.com/coreos/etcd/clientv3" // "clientv3"
"github.com/purpleidea/mgmt/etcd/interfaces"
)
// Client provides a simple interface specification for client requests. Both
// EmbdEtcd and ClientEtcd implement this.
type Client interface {
// TODO: add more method signatures
Get(path string, opts ...etcd.OpOption) (map[string]string, error)
Txn(ifcmps []etcd.Cmp, thenops, elseops []etcd.Op) (*etcd.TxnResponse, error)
}
const (
// errInconsistentApply means applyDeltaEvents wasn't consistent.
errInconsistentApply = interfaces.Error("inconsistent apply")
)

File diff suppressed because it is too large Load Diff

View File

@@ -21,31 +21,19 @@ package etcd
import (
"testing"
etcdtypes "github.com/coreos/etcd/pkg/types"
)
func TestNewEmbdEtcd(t *testing.T) {
// should return a new etcd object
noServer := false
var flags Flags
obj := NewEmbdEtcd("", nil, nil, nil, nil, nil, noServer, false, 0, flags, "", nil)
if obj == nil {
t.Fatal("failed to create server object")
}
}
func TestNewEmbdEtcdConfigValidation(t *testing.T) {
// running --no-server with no --seeds specified should fail early
seeds := make(etcdtypes.URLs, 0)
noServer := true
var flags Flags
obj := NewEmbdEtcd("", seeds, nil, nil, nil, nil, noServer, false, 0, flags, "", nil)
if obj != nil {
t.Fatal("server initialization should fail on invalid configuration")
func TestValidation1(t *testing.T) {
// running --no-server with no --seeds should not validate at the moment
embdEtcd := &EmbdEtcd{
//Seeds: etcdtypes.URLs{},
NoServer: true,
}
if err := embdEtcd.Validate(); err == nil {
t.Errorf("expected validation err, got nil")
}
if err := embdEtcd.Init(); err == nil {
t.Errorf("expected init err, got nil")
defer embdEtcd.Close()
}
}

View File

@@ -1,70 +0,0 @@
// Mgmt
// Copyright (C) 2013-2019+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// Package event provides some primitives that are used for message passing.
package event
import (
"fmt"
)
// Resp is a channel to be used for boolean responses. A nil represents an ACK,
// and a non-nil represents a NACK (false). This also lets us use custom errors.
type Resp chan error
// NewResp is just a helper to return the right type of response channel.
func NewResp() Resp {
resp := make(chan error)
return resp
}
// ACK sends a true value to resp.
func (resp Resp) ACK() {
if resp != nil {
resp <- nil // TODO: close instead?
}
}
// NACK sends a false value to resp.
func (resp Resp) NACK() {
if resp != nil {
resp <- fmt.Errorf("NACK")
}
}
// ACKNACK sends a custom ACK or NACK. The ACK value is always nil, the NACK can
// be any non-nil error value.
func (resp Resp) ACKNACK(err error) {
if resp != nil {
resp <- err
}
}
// Wait waits for any response from a Resp channel and returns it.
func (resp Resp) Wait() error {
return <-resp
}
// ACKWait waits for a +ive Ack from a Resp channel.
func (resp Resp) ACKWait() {
for {
// wait until true value
if resp.Wait() == nil {
return
}
}
}

View File

@@ -22,7 +22,6 @@ import (
"encoding/gob"
"fmt"
"io"
"log"
"os"
"path"
"strings"
@@ -32,6 +31,7 @@ import (
"github.com/purpleidea/mgmt/util/errwrap"
etcd "github.com/coreos/etcd/clientv3" // "clientv3"
etcdutil "github.com/coreos/etcd/clientv3/clientv3util"
)
func init() {
@@ -263,10 +263,8 @@ func (obj *File) Sync() error {
p := obj.path() // store file data at this path in etcd
// TODO: use https://github.com/coreos/etcd/pull/7417 if merged
cmp := etcd.Compare(etcd.Version(p), "=", 0) // KeyMissing
//cmp := etcd.KeyMissing(p))
//cmp := etcd.Compare(etcd.Version(p), "=", 0) // KeyMissing
cmp := etcdutil.KeyMissing(p)
op := etcd.OpPut(p, string(obj.data)) // this pushes contents to server
// it's important to do this in one transaction, and atomically, because
@@ -277,7 +275,7 @@ func (obj *File) Sync() error {
}
if !result.Succeeded {
if obj.fs.Debug {
log.Printf("debug: data already exists in storage")
obj.fs.Logf("debug: data already exists in storage")
}
}

View File

@@ -20,6 +20,7 @@ package fs
import (
"bytes"
"context"
"crypto/sha256"
"encoding/gob"
"encoding/hex"
@@ -27,19 +28,18 @@ import (
"fmt"
"hash"
"io"
"log"
"os"
"path"
"strings"
"syscall"
"time"
"github.com/purpleidea/mgmt/etcd/interfaces"
"github.com/purpleidea/mgmt/util/errwrap"
etcd "github.com/coreos/etcd/clientv3" // "clientv3"
rpctypes "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
"github.com/spf13/afero"
context "golang.org/x/net/context"
)
func init() {
@@ -91,7 +91,7 @@ var (
// XXX: this is harder because we need to list of *all* metadata paths, if we
// want them to be able to share storage backends. (we do)
type Fs struct {
Client *etcd.Client
Client interfaces.Client
Metadata string // location of "superblock" for this filesystem
@@ -99,6 +99,7 @@ type Fs struct {
Hash string // eg: sha256
Debug bool
Logf func(format string, v ...interface{})
sb *superBlock
mounted bool
@@ -115,7 +116,7 @@ type superBlock struct {
// NewEtcdFs creates a new filesystem handle on an etcd client connection. You
// must specify the metadata string that you wish to use.
func NewEtcdFs(client *etcd.Client, metadata string) afero.Fs {
func NewEtcdFs(client interfaces.Client, metadata string) afero.Fs {
return &Fs{
Client: client,
Metadata: metadata,
@@ -127,23 +128,26 @@ func (obj *Fs) get(path string, opts ...etcd.OpOption) (map[string][]byte, error
ctx, cancel := context.WithTimeout(context.Background(), EtcdTimeout)
resp, err := obj.Client.Get(ctx, path, opts...)
cancel()
if err != nil || resp == nil {
if err != nil {
return nil, err
}
if resp == nil {
return nil, fmt.Errorf("empty response")
}
// TODO: write a resp.ToMap() function on https://godoc.org/github.com/coreos/etcd/etcdserver/etcdserverpb#RangeResponse
result := make(map[string][]byte) // formerly: map[string][]byte
for _, x := range resp.Kvs {
result[string(x.Key)] = x.Value // formerly: bytes.NewBuffer(x.Value).String()
// FIXME: just return resp instead if it was map[string]string?
result := make(map[string][]byte)
for key, val := range resp {
result[key] = []byte(val) // wasteful transform
}
return result, nil
}
// put a value into etcd.
func (obj *Fs) put(path string, data []byte, opts ...etcd.OpOption) error {
func (obj *Fs) set(path string, data []byte, opts ...etcd.OpOption) error {
ctx, cancel := context.WithTimeout(context.Background(), EtcdTimeout)
_, err := obj.Client.Put(ctx, path, string(data), opts...) // TODO: obj.Client.KV ?
err := obj.Client.Set(ctx, path, string(data), opts...)
cancel()
if err != nil {
switch err {
@@ -163,7 +167,7 @@ func (obj *Fs) put(path string, data []byte, opts ...etcd.OpOption) error {
// txn runs a txn in etcd.
func (obj *Fs) txn(ifcmps []etcd.Cmp, thenops, elseops []etcd.Op) (*etcd.TxnResponse, error) {
ctx, cancel := context.WithTimeout(context.Background(), EtcdTimeout)
resp, err := obj.Client.Txn(ctx).If(ifcmps...).Then(thenops...).Else(elseops...).Commit()
resp, err := obj.Client.Txn(ctx, ifcmps, thenops, elseops)
cancel()
return resp, err
}
@@ -194,7 +198,7 @@ func (obj *Fs) sync() error {
return errwrap.Wrapf(err, "gob failed to encode")
}
//base64.StdEncoding.EncodeToString(b.Bytes())
return obj.put(obj.Metadata, b.Bytes())
return obj.set(obj.Metadata, b.Bytes())
}
// mount downloads the initial cache of metadata, including the *file tree.
@@ -213,7 +217,7 @@ func (obj *Fs) mount() error {
}
if result == nil || len(result) == 0 { // nothing found, create the fs
if obj.Debug {
log.Printf("debug: mount: creating new fs at: %s", obj.Metadata)
obj.Logf("mount: creating new fs at: %s", obj.Metadata)
}
// trim any trailing slashes from DataPrefix
for strings.HasSuffix(obj.DataPrefix, "/") {
@@ -248,7 +252,7 @@ func (obj *Fs) mount() error {
}
if obj.Debug {
log.Printf("debug: mount: opening old fs at: %s", obj.Metadata)
obj.Logf("mount: opening old fs at: %s", obj.Metadata)
}
sb, exists := result[obj.Metadata]
if !exists {

View File

@@ -26,7 +26,7 @@ import (
"syscall"
"testing"
"github.com/purpleidea/mgmt/etcd"
"github.com/purpleidea/mgmt/etcd/client"
etcdfs "github.com/purpleidea/mgmt/etcd/fs"
"github.com/purpleidea/mgmt/integration"
"github.com/purpleidea/mgmt/util"
@@ -41,6 +41,7 @@ import (
const (
umask = 0666
superblock = "/some/superblock" // TODO: generate randomly per test?
ns = "/_mgmt/test" // must not end with a slash!
)
// Ensure that etcdfs.Fs implements afero.Fs.
@@ -79,20 +80,26 @@ func TestFs1(t *testing.T) {
}
defer stopEtcd() // ignore the error
etcdClient := &etcd.ClientEtcd{
Seeds: []string{"localhost:2379"}, // endpoints
logf := func(format string, v ...interface{}) {
t.Logf("test: etcd: fs: "+format, v...)
}
etcdClient := client.NewClientFromSeedsNamespace(
[]string{"localhost:2379"}, // endpoints
ns,
)
if err := etcdClient.Connect(); err != nil {
if err := etcdClient.Init(); err != nil {
t.Errorf("client connection error: %+v", err)
return
}
defer etcdClient.Destroy()
defer etcdClient.Close()
etcdFs := &etcdfs.Fs{
Client: etcdClient.GetClient(),
Client: etcdClient,
Metadata: superblock,
DataPrefix: etcdfs.DefaultDataPrefix,
Logf: logf,
}
//var etcdFs afero.Fs = NewEtcdFs()
@@ -193,20 +200,26 @@ func TestFs2(t *testing.T) {
}
defer stopEtcd() // ignore the error
etcdClient := &etcd.ClientEtcd{
Seeds: []string{"localhost:2379"}, // endpoints
logf := func(format string, v ...interface{}) {
t.Logf("test: etcd: fs: "+format, v...)
}
etcdClient := client.NewClientFromSeedsNamespace(
[]string{"localhost:2379"}, // endpoints
ns,
)
if err := etcdClient.Connect(); err != nil {
if err := etcdClient.Init(); err != nil {
t.Errorf("client connection error: %+v", err)
return
}
defer etcdClient.Destroy()
defer etcdClient.Close()
etcdFs := &etcdfs.Fs{
Client: etcdClient.GetClient(),
Client: etcdClient,
Metadata: superblock,
DataPrefix: etcdfs.DefaultDataPrefix,
Logf: logf,
}
tree, err := util.FsTree(etcdFs, "/")
@@ -246,20 +259,26 @@ func TestFs3(t *testing.T) {
}
defer stopEtcd() // ignore the error
etcdClient := &etcd.ClientEtcd{
Seeds: []string{"localhost:2379"}, // endpoints
logf := func(format string, v ...interface{}) {
t.Logf("test: etcd: fs: "+format, v...)
}
etcdClient := client.NewClientFromSeedsNamespace(
[]string{"localhost:2379"}, // endpoints
ns,
)
if err := etcdClient.Connect(); err != nil {
if err := etcdClient.Init(); err != nil {
t.Errorf("client connection error: %+v", err)
return
}
defer etcdClient.Destroy()
defer etcdClient.Close()
etcdFs := &etcdfs.Fs{
Client: etcdClient.GetClient(),
Client: etcdClient,
Metadata: superblock,
DataPrefix: etcdfs.DefaultDataPrefix,
Logf: logf,
}
if err := etcdFs.Mkdir("/tmp", umask); err != nil {
@@ -371,18 +390,19 @@ func TestEtcdCopyFs0(t *testing.T) {
}
defer stopEtcd() // ignore the error
etcdClient := &etcd.ClientEtcd{
Seeds: []string{"localhost:2379"}, // endpoints
}
etcdClient := client.NewClientFromSeedsNamespace(
[]string{"localhost:2379"}, // endpoints
ns,
)
if err := etcdClient.Connect(); err != nil {
if err := etcdClient.Init(); err != nil {
t.Errorf("client connection error: %+v", err)
return
}
defer etcdClient.Destroy()
defer etcdClient.Close()
etcdFs := &etcdfs.Fs{
Client: etcdClient.GetClient(),
Client: etcdClient,
Metadata: superblock,
DataPrefix: etcdfs.DefaultDataPrefix,
}

160
etcd/helpers.go Normal file
View File

@@ -0,0 +1,160 @@
// Mgmt
// Copyright (C) 2013-2019+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package etcd
import (
"context"
"fmt"
"sort"
"strings"
"github.com/purpleidea/mgmt/etcd/interfaces"
"github.com/purpleidea/mgmt/util"
"github.com/purpleidea/mgmt/util/errwrap"
etcd "github.com/coreos/etcd/clientv3"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/mvcc/mvccpb"
etcdtypes "github.com/coreos/etcd/pkg/types" // generated package
)
// setEndpoints sets the endpoints on the etcd client if it exists. It
// prioritizes local endpoints for performance, and so that if a remote endpoint
// disconnects we aren't affected.
func (obj *EmbdEtcd) setEndpoints() {
if obj.etcd == nil { // if client doesn't exist, skip!
return
}
eps := fromURLsMapToStringList(obj.endpoints) // get flat list
sort.Strings(eps) // sort for determinism
curls, _ := obj.curls() // ignore error, was already validated
// prio sort so we connect locally first
urls := fromURLsToStringList(curls)
headFn := func(x string) bool {
return !util.StrInList(x, urls)
}
eps = util.PriorityStrSliceSort(eps, headFn)
if obj.Debug {
obj.Logf("set endpoints to: %+v", eps)
}
// trigger reconnect with new endpoint list
// XXX: When a client switches endpoints, do the watches continue from
// where they last were or do they restart? Add rev restart if needed.
obj.etcd.SetEndpoints(eps...) // no error to check
}
// ConnectBlock runs a command as soon as the client is connected. When this
// happens, it closes the output channel. In case any error occurs, it sends it
// on that channel.
func (obj *EmbdEtcd) ConnectBlock(ctx context.Context, fn func(context.Context) error) <-chan error {
ch := make(chan error)
obj.wg.Add(1)
go func() {
defer obj.wg.Done()
defer close(ch)
select {
case <-obj.connectSignal: // the client is connected!
case <-ctx.Done():
return
}
if fn == nil {
return
}
if err := fn(ctx); err != nil {
select {
case ch <- err:
case <-ctx.Done():
}
}
}()
return ch
}
// bootstrapWatcherData returns some a minimal WatcherData struct to simulate an
// initial event for bootstrapping the nominateCb before we've started up.
func bootstrapWatcherData(hostname string, urls etcdtypes.URLs) *interfaces.WatcherData {
return &interfaces.WatcherData{
Created: true, // add this flag to hint that we're bootstrapping
Header: pb.ResponseHeader{}, // not needed
Events: []*etcd.Event{
{
Type: mvccpb.PUT, // or mvccpb.DELETE
Kv: &mvccpb.KeyValue{
Key: []byte(hostname),
Value: []byte(urls.String()),
},
},
},
}
}
// applyDeltaEvents applies the WatchResponse deltas to a URLsMap and returns a
// modified copy.
func applyDeltaEvents(data *interfaces.WatcherData, urlsMap etcdtypes.URLsMap) (etcdtypes.URLsMap, error) {
if err := data.Err; err != nil {
return nil, errwrap.Wrapf(err, "data contains an error")
}
out, err := copyURLsMap(urlsMap)
if err != nil {
return nil, err
}
if data == nil { // passthrough
return out, nil
}
var reterr error
for _, event := range data.Events {
key := string(event.Kv.Key)
key = key[len(data.Path):] // remove path prefix
//obj.Logf("applyDeltaEvents: Event(%s): %s", event.Type.String(), key)
switch event.Type {
case etcd.EventTypePut:
val := string(event.Kv.Value)
if val == "" {
return nil, fmt.Errorf("value is empty")
}
urls, err := etcdtypes.NewURLs(strings.Split(val, ","))
if err != nil {
return nil, errwrap.Wrapf(err, "format error")
}
urlsMap[key] = urls // add to map
// expiry cases are seen as delete in v3 for now
//case etcd.EventTypeExpire: // doesn't exist right now
// fallthrough
case etcd.EventTypeDelete:
if _, exists := urlsMap[key]; exists {
delete(urlsMap, key)
continue
}
// this can happen if we retry an operation between a
// reconnect, so ignore in case we are reconnecting...
reterr = errInconsistentApply // key not found
// keep applying in case this is ignored
default:
return nil, fmt.Errorf("unknown event: %v", event.Type)
}
}
return urlsMap, reterr
}

63
etcd/interfaces/client.go Normal file
View File

@@ -0,0 +1,63 @@
// Mgmt
// Copyright (C) 2013-2019+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package interfaces
import (
"context"
etcd "github.com/coreos/etcd/clientv3" // "clientv3"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
)
// WatcherData is the structure of data passed to a callback from any watcher.
type WatcherData struct {
// Created is true if this event is the initial event sent on startup.
Created bool
// XXX: what goes here... this? or a more processed version?
Path string // the path we're watching
Header pb.ResponseHeader
Events []*etcd.Event
Err error
}
// WatcherInfo is what is returned from a Watcher. It contains everything you
// might need to get information about the running watch.
type WatcherInfo struct {
// Cancel must be called to shutdown the Watcher when we are done with
// it. You can alternatively call cancel on the input ctx.
Cancel func()
// Events returns a channel of any events that occur. This happens on
// watch startup, watch event, and watch failure. This channel closes
// when the Watcher shuts down. If you block on these reads, then you
// will block the entire Watcher which is usually not what you want.
Events <-chan *WatcherData
}
// Client provides a simple interface specification for client requests. Both
// EmbdEtcd.MakeClient and client.Simple implement this.
type Client interface {
GetClient() *etcd.Client
Set(ctx context.Context, key, value string, opts ...etcd.OpOption) error
Get(ctx context.Context, path string, opts ...etcd.OpOption) (map[string]string, error)
Del(ctx context.Context, path string, opts ...etcd.OpOption) (int64, error)
Txn(ctx context.Context, ifCmps []etcd.Cmp, thenOps, elseOps []etcd.Op) (*etcd.TxnResponse, error)
Watcher(ctx context.Context, path string, opts ...etcd.OpOption) (chan error, error)
ComplexWatcher(ctx context.Context, path string, opts ...etcd.OpOption) (*WatcherInfo, error)
}

33
etcd/interfaces/error.go Normal file
View File

@@ -0,0 +1,33 @@
// Mgmt
// Copyright (C) 2013-2019+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package interfaces
// Error is a constant error type that implements error.
type Error string
// Error fulfills the error interface of this type.
func (e Error) Error() string { return string(e) }
const (
// ErrNotExist is returned when GetStr or friends can not find the
// requested key.
ErrNotExist = Error("ErrNotExist")
// ErrShutdown is returned when we're exiting during a shutdown.
ErrShutdown = Error("ErrShutdown")
)

314
etcd/membership.go Normal file
View File

@@ -0,0 +1,314 @@
// Mgmt
// Copyright (C) 2013-2019+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package etcd
import (
"context"
"fmt"
"net/url"
"sort"
"time"
"github.com/purpleidea/mgmt/util/errwrap"
etcd "github.com/coreos/etcd/clientv3"
rpctypes "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
etcdtypes "github.com/coreos/etcd/pkg/types"
)
// addSelfState is used to populate the initial state when I am adding myself.
func (obj *EmbdEtcd) addSelfState() {
surls, _ := obj.surls() // validated on init
curls, _ := obj.curls() // validated on init
obj.membermap[obj.Hostname] = surls
obj.endpoints[obj.Hostname] = curls
obj.memberIDs[obj.Hostname] = obj.serverID
}
// addMemberState adds the specific member state to our local caches.
func (obj *EmbdEtcd) addMemberState(member string, id uint64, surls, curls etcdtypes.URLs) {
obj.stateMutex.Lock()
defer obj.stateMutex.Unlock()
if surls != nil {
obj.membermap[member] = surls
}
if curls != nil { // TODO: && len(curls) > 0 ?
obj.endpoints[member] = curls
}
obj.memberIDs[member] = id
}
// rmMemberState removes the state of a given member.
func (obj *EmbdEtcd) rmMemberState(member string) {
obj.stateMutex.Lock()
defer obj.stateMutex.Unlock()
delete(obj.membermap, member) // proactively delete it
delete(obj.endpoints, member) // proactively delete it
delete(obj.memberIDs, member) // proactively delete it
}
// updateMemberState updates some of our local state whenever we get new
// information from a response.
// TODO: ideally this would be []*etcd.Member but the types are inconsistent...
// TODO: is it worth computing a delta to see if we need to change this?
func (obj *EmbdEtcd) updateMemberState(members []*pb.Member) error {
//nominated := make(etcdtypes.URLsMap)
//volunteers := make(etcdtypes.URLsMap)
membermap := make(etcdtypes.URLsMap) // map[hostname]URLs
endpoints := make(etcdtypes.URLsMap) // map[hostname]URLs
memberIDs := make(map[string]uint64) // map[hostname]memberID
// URLs is etcdtypes.URLs is []url.URL
for _, member := range members {
// member.ID // uint64
// member.Name // string (hostname)
// member.PeerURLs // []string (URLs)
// member.ClientURLs // []string (URLs)
if member.Name == "" { // not started yet
continue
}
// []string -> etcdtypes.URLs
purls, err := etcdtypes.NewURLs(member.PeerURLs)
if err != nil {
return err
}
curls, err := etcdtypes.NewURLs(member.ClientURLs)
if err != nil {
return err
}
//nominated[member.Name] = member.PeerURLs
//volunteers[member.Name] = member.PeerURLs
membermap[member.Name] = purls
endpoints[member.Name] = curls
memberIDs[member.Name] = member.ID
}
// set
obj.stateMutex.Lock()
defer obj.stateMutex.Unlock()
// can't set these two, because we only have a partial knowledge of them
//obj.nominated = nominated // can't get this information (partial)
//obj.volunteers = volunteers // can't get this information (partial)
obj.membermap = membermap
obj.endpoints = endpoints
obj.memberIDs = memberIDs
return nil
}
// memberList returns the current list of server peer members in the cluster.
func (obj *EmbdEtcd) memberList(ctx context.Context) (*etcd.MemberListResponse, error) {
return obj.etcd.MemberList(ctx)
}
// memberAdd adds a member to the cluster.
func (obj *EmbdEtcd) memberAdd(ctx context.Context, peerURLs etcdtypes.URLs) (*etcd.MemberAddResponse, error) {
resp, err := obj.etcd.MemberAdd(ctx, peerURLs.StringSlice())
if err == rpctypes.ErrPeerURLExist { // commonly seen at startup
return nil, nil
}
if err == rpctypes.ErrMemberExist { // not seen yet, but plan for it
return nil, nil
}
return resp, err
}
// memberRemove removes a member by ID and returns if it worked, and also if
// there was an error. This is because it might have run without error, but the
// member wasn't found, for example. If a value of zero is used, then it will
// try to remove itself in an idempotent way based on whether we're supposed to
// be running a server or not.
func (obj *EmbdEtcd) memberRemove(ctx context.Context, memberID uint64) (*etcd.MemberRemoveResponse, error) {
if memberID == 0 {
// copy value to avoid it changing part way through
memberID = obj.serverID
}
if memberID == 0 {
return nil, fmt.Errorf("can't remove memberID of zero")
}
resp, err := obj.etcd.MemberRemove(ctx, memberID)
if err == rpctypes.ErrMemberNotFound {
// if we get this, member already shut itself down :)
return nil, nil // unchanged, mask this error
}
return resp, err // changed
}
// memberChange polls the member list API and runs a function on each iteration.
// If that function returns nil, then it closes the output channel to signal an
// event. Between iterations, it sleeps for a given interval. Since this polls
// and doesn't watch events, it could miss changes if they happen rapidly. It
// does not send results on the channel, since results could be captured in the
// fn callback. It will send an error on the channel if something goes wrong.
// TODO: https://github.com/coreos/etcd/issues/5277
func (obj *EmbdEtcd) memberChange(ctx context.Context, fn func([]*pb.Member) error, d time.Duration) (chan error, error) {
ch := make(chan error)
go func() {
defer close(ch)
for {
resp, err := obj.etcd.MemberList(ctx)
if err != nil {
select {
case ch <- err: // send error
case <-ctx.Done():
}
return
}
result := fn(resp.Members)
if result == nil { // done!
return
}
select {
case <-time.After(d): // sleep before retry
// pass
case <-ctx.Done():
return
}
}
}()
return ch, nil
}
// memberStateFromList does a member list, and applies the state to our cache.
func (obj *EmbdEtcd) memberStateFromList(ctx context.Context) error {
resp, err := obj.memberList(ctx)
if err != nil {
return err
}
if resp == nil {
return fmt.Errorf("empty response")
}
reterr := obj.updateMemberState(resp.Members)
if reterr == nil {
obj.setEndpoints() // sync client with new endpoints
}
return reterr
}
// isLeader returns true if I'm the leader from the first sane perspective (pov)
// that I can arbitrarily pick.
func (obj *EmbdEtcd) isLeader(ctx context.Context) (bool, error) {
if obj.server == nil {
return false, nil // if i'm not a server, i'm not a leader, return
}
var ep, backup *url.URL
if len(obj.ClientURLs) > 0 {
// heuristic, but probably correct
addresses := localhostURLs(obj.ClientURLs)
if len(addresses) > 0 {
ep = &addresses[0] // arbitrarily pick the first one
}
backup = &obj.ClientURLs[0] // backup
}
if ep == nil && len(obj.AClientURLs) > 0 {
addresses := localhostURLs(obj.AClientURLs)
if len(addresses) > 0 {
ep = &addresses[0]
}
backup = &obj.AClientURLs[0] // backup
}
if ep == nil {
ep = backup
}
if ep == nil { // programming error?
return false, fmt.Errorf("no available endpoints")
}
// Ask for one perspective...
// TODO: are we supposed to use ep.Host instead?
resp, err := obj.etcd.Maintenance.Status(ctx, ep.String()) // this perspective
if err != nil {
return false, err
}
if resp == nil {
return false, fmt.Errorf("empty response")
}
if resp.Leader != obj.serverID { // i am not the leader
return false, nil
}
return true, nil
}
// moveLeaderSomewhere tries to transfer the leader to the alphanumerically
// lowest member if the caller is the current leader. This contains races. If it
// succeeds, it returns the member hostname that it transferred to. If it can't
// transfer, but doesn't error, it returns an empty string. Any error condition
// returns an error.
func (obj *EmbdEtcd) moveLeaderSomewhere(ctx context.Context) (string, error) {
//if isLeader, err := obj.isLeader(ctx); err != nil { // race!
// return "", errwrap.Wrapf(err, "error determining leader")
//} else if !isLeader {
// if obj.Debug {
// obj.Logf("we are not the leader...")
// }
// return "", nil
//}
// assume i am the leader!
memberList, err := obj.memberList(ctx)
if err != nil {
return "", err
}
var transfereeID uint64
m := make(map[string]uint64)
names := []string{}
for _, x := range memberList.Members {
m[x.Name] = x.ID
if x.Name != obj.Hostname {
names = append(names, x.Name)
}
}
if len(names) == 0 {
return "", nil // can't transfer to self, last remaining host
}
if len(names) == 1 && names[0] == obj.Hostname { // does this happen?
return "", nil // can't transfer to self
}
sort.Strings(names)
if len(names) > 0 {
// transfer to alphanumerically lowest ID for consistency...
transfereeID = m[names[0]]
}
if transfereeID == 0 { // safety
return "", fmt.Errorf("got memberID of zero")
}
if transfereeID == obj.serverID {
return "", nil // can't transfer to self
}
// do the move
if _, err := obj.etcd.MoveLeader(ctx, transfereeID); err == rpctypes.ErrNotLeader {
if obj.Debug {
obj.Logf("we are not the leader...")
}
return "", nil // we are not the leader
} else if err != nil {
return "", errwrap.Wrapf(err, "error moving leader")
}
return names[0], nil
}

View File

@@ -18,394 +18,220 @@
package etcd
import (
"context"
"fmt"
"log"
"strconv"
"strings"
"github.com/purpleidea/mgmt/util/errwrap"
etcd "github.com/coreos/etcd/clientv3"
rpctypes "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
etcdutil "github.com/coreos/etcd/clientv3/clientv3util"
etcdtypes "github.com/coreos/etcd/pkg/types"
context "golang.org/x/net/context"
)
// TODO: Could all these Etcd*(obj *EmbdEtcd, ...) functions which deal with the
// interface between etcd paths and behaviour be grouped into a single struct ?
// Nominate nominates a particular client to be a server (peer).
func Nominate(obj *EmbdEtcd, hostname string, urls etcdtypes.URLs) error {
if obj.flags.Trace {
log.Printf("Trace: Etcd: Nominate(%v): %v", hostname, urls.String())
defer log.Printf("Trace: Etcd: Nominate(%v): Finished!", hostname)
// volunteer offers yourself up to be a server if needed. If you specify a nil
// value for urls, then this will unvolunteer yourself.
func (obj *EmbdEtcd) volunteer(ctx context.Context, urls etcdtypes.URLs) error {
if obj.Debug {
if urls == nil {
obj.Logf("unvolunteer...")
defer obj.Logf("unvolunteer: done!")
} else {
obj.Logf("volunteer: %s", urls.String())
defer obj.Logf("volunteer: done!")
}
// nominate someone to be a server
nominate := fmt.Sprintf("%s/nominated/%s", NS, hostname)
ops := []etcd.Op{} // list of ops in this txn
if urls != nil {
ops = append(ops, etcd.OpPut(nominate, urls.String())) // TODO: add a TTL? (etcd.WithLease)
} else { // delete message if set to erase
ops = append(ops, etcd.OpDelete(nominate))
}
if _, err := obj.Txn(nil, ops, nil); err != nil {
return fmt.Errorf("nominate failed") // exit in progress?
}
return nil
}
// Nominated returns a urls map of nominated etcd server volunteers.
// NOTE: I know 'nominees' might be more correct, but is less consistent here
func Nominated(obj *EmbdEtcd) (etcdtypes.URLsMap, error) {
path := fmt.Sprintf("%s/nominated/", NS)
keyMap, err := obj.Get(path, etcd.WithPrefix()) // map[string]string, bool
if err != nil {
return nil, fmt.Errorf("nominated isn't available: %v", err)
}
nominated := make(etcdtypes.URLsMap)
for key, val := range keyMap { // loop through directory of nominated
if !strings.HasPrefix(key, path) {
continue
}
name := key[len(path):] // get name of nominee
if val == "" { // skip "erased" values
continue
}
urls, err := etcdtypes.NewURLs(strings.Split(val, ","))
if err != nil {
return nil, fmt.Errorf("nominated data format error: %v", err)
}
nominated[name] = urls // add to map
if obj.flags.Debug {
log.Printf("Etcd: Nominated(%v): %v", name, val)
}
}
return nominated, nil
}
// Volunteer offers yourself up to be a server if needed.
func Volunteer(obj *EmbdEtcd, urls etcdtypes.URLs) error {
if obj.flags.Trace {
log.Printf("Trace: Etcd: Volunteer(%v): %v", obj.hostname, urls.String())
defer log.Printf("Trace: Etcd: Volunteer(%v): Finished!", obj.hostname)
}
// volunteer to be a server
volunteer := fmt.Sprintf("%s/volunteers/%s", NS, obj.hostname)
key := fmt.Sprintf(obj.NS+volunteerPathFmt, obj.Hostname)
ifs := []etcd.Cmp{} // list matching the desired state
ops := []etcd.Op{} // list of ops in this txn
els := []etcd.Op{}
if urls != nil {
// XXX: adding a TTL is crucial! (i think)
ops = append(ops, etcd.OpPut(volunteer, urls.String())) // value is usually a peer "serverURL"
data := urls.String() // value is usually a peer "serverURL"
// XXX: bug: https://github.com/etcd-io/etcd/issues/10566
// XXX: reverse things with els to workaround the bug :(
//ifs = append(ifs, etcd.Compare(etcd.Value(key), "!=", data)) // desired state
//ops = append(ops, etcd.OpPut(key, data, etcd.WithLease(obj.leaseID)))
ifs = append(ifs, etcd.Compare(etcd.Value(key), "=", data)) // desired state
ifs = append(ifs, etcd.Compare(etcd.LeaseValue(key), "=", obj.leaseID))
els = append(ops, etcd.OpPut(key, data, etcd.WithLease(obj.leaseID)))
} else { // delete message if set to erase
ops = append(ops, etcd.OpDelete(volunteer))
ifs = append(ifs, etcdutil.KeyExists(key)) // desired state
ops = append(ops, etcd.OpDelete(key))
}
if _, err := obj.Txn(nil, ops, nil); err != nil {
return fmt.Errorf("volunteering failed") // exit in progress?
_, err := obj.client.Txn(ctx, ifs, ops, els)
msg := "volunteering failed"
if urls == nil {
msg = "unvolunteering failed"
}
return nil
return errwrap.Wrapf(err, msg)
}
// Volunteers returns a urls map of available etcd server volunteers.
func Volunteers(obj *EmbdEtcd) (etcdtypes.URLsMap, error) {
if obj.flags.Trace {
log.Printf("Trace: Etcd: Volunteers()")
defer log.Printf("Trace: Etcd: Volunteers(): Finished!")
// nominate nominates a particular client to be a server (peer). If you specify
// a nil value for urls, then this will unnominate that member.
func (obj *EmbdEtcd) nominate(ctx context.Context, hostname string, urls etcdtypes.URLs) error {
if obj.Debug {
if urls == nil {
obj.Logf("unnominate(%s)...", hostname)
defer obj.Logf("unnominate(%s): done!", hostname)
} else {
obj.Logf("nominate(%s): %s", hostname, urls.String())
defer obj.Logf("nominate(%s): done!", hostname)
}
path := fmt.Sprintf("%s/volunteers/", NS)
keyMap, err := obj.Get(path, etcd.WithPrefix())
}
// nominate someone to be a server
key := fmt.Sprintf(obj.NS+nominatedPathFmt, hostname)
ifs := []etcd.Cmp{} // list matching the desired state
ops := []etcd.Op{} // list of ops in this txn
els := []etcd.Op{}
if urls != nil {
data := urls.String()
// XXX: bug: https://github.com/etcd-io/etcd/issues/10566
// XXX: reverse things with els to workaround the bug :(
//ifs = append(ifs, etcd.Compare(etcd.Value(key), "!=", data)) // desired state
//ops = append(ops, etcd.OpPut(key, data)) // TODO: add a TTL? (etcd.WithLease)
ifs = append(ifs, etcd.Compare(etcd.Value(key), "=", data)) // desired state
els = append(ops, etcd.OpPut(key, data)) // TODO: add a TTL? (etcd.WithLease)
} else { // delete message if set to erase
ifs = append(ifs, etcdutil.KeyExists(key)) // desired state
ops = append(ops, etcd.OpDelete(key))
}
_, err := obj.client.Txn(ctx, ifs, ops, els)
msg := "nominate failed"
if urls == nil {
msg = "unnominate failed"
}
return errwrap.Wrapf(err, msg)
}
// advertise idempotently advertises the list of available client endpoints for
// the given member. If you specify a nil value for urls, then this will remove
// that member.
func (obj *EmbdEtcd) advertise(ctx context.Context, hostname string, urls etcdtypes.URLs) error {
if obj.Debug {
if urls == nil {
obj.Logf("unadvertise(%s)...", hostname)
defer obj.Logf("unadvertise(%s): done!", hostname)
} else {
obj.Logf("advertise(%s): %s", hostname, urls.String())
defer obj.Logf("advertise(%s): done!", hostname)
}
}
// advertise endpoints
key := fmt.Sprintf(obj.NS+endpointsPathFmt, hostname)
ifs := []etcd.Cmp{} // list matching the desired state
ops := []etcd.Op{} // list of ops in this txn
els := []etcd.Op{}
if urls != nil {
data := urls.String() // value is usually a "clientURL"
// XXX: bug: https://github.com/etcd-io/etcd/issues/10566
// XXX: reverse things with els to workaround the bug :(
//ifs = append(ifs, etcd.Compare(etcd.Value(key), "!=", data)) // desired state
//ops = append(ops, etcd.OpPut(key, data, etcd.WithLease(obj.leaseID)))
ifs = append(ifs, etcd.Compare(etcd.Value(key), "=", data)) // desired state
ifs = append(ifs, etcd.Compare(etcd.LeaseValue(key), "=", obj.leaseID))
els = append(ops, etcd.OpPut(key, data, etcd.WithLease(obj.leaseID)))
} else { // delete in this case
ifs = append(ifs, etcdutil.KeyExists(key)) // desired state
ops = append(ops, etcd.OpDelete(key))
}
_, err := obj.client.Txn(ctx, ifs, ops, els)
msg := "advertising failed"
if urls == nil {
msg = "unadvertising failed"
}
return errwrap.Wrapf(err, msg)
}
// getVolunteers returns a urls map of available etcd server volunteers.
func (obj *EmbdEtcd) getVolunteers(ctx context.Context) (etcdtypes.URLsMap, error) {
if obj.Debug {
obj.Logf("getVolunteers()")
defer obj.Logf("getVolunteers(): done!")
}
p := obj.NS + VolunteerPath
keyMap, err := obj.client.Get(ctx, p, etcd.WithPrefix())
if err != nil {
return nil, fmt.Errorf("volunteers aren't available: %v", err)
return nil, errwrap.Wrapf(err, "can't get peer volunteers")
}
volunteers := make(etcdtypes.URLsMap)
for key, val := range keyMap { // loop through directory of volunteers
if !strings.HasPrefix(key, path) {
if !strings.HasPrefix(key, p) {
continue
}
name := key[len(path):] // get name of volunteer
name := key[len(p):] // get name of volunteer
if val == "" { // skip "erased" values
continue
}
urls, err := etcdtypes.NewURLs(strings.Split(val, ","))
if err != nil {
return nil, fmt.Errorf("volunteers data format error: %v", err)
return nil, errwrap.Wrapf(err, "data format error")
}
volunteers[name] = urls // add to map
if obj.flags.Debug {
log.Printf("Etcd: Volunteer(%v): %v", name, val)
}
}
return volunteers, nil
}
// AdvertiseEndpoints advertises the list of available client endpoints.
func AdvertiseEndpoints(obj *EmbdEtcd, urls etcdtypes.URLs) error {
if obj.flags.Trace {
log.Printf("Trace: Etcd: AdvertiseEndpoints(%v): %v", obj.hostname, urls.String())
defer log.Printf("Trace: Etcd: AdvertiseEndpoints(%v): Finished!", obj.hostname)
// getNominated returns a urls map of nominated etcd server volunteers.
// NOTE: I know 'nominees' might be more correct, but is less consistent here
func (obj *EmbdEtcd) getNominated(ctx context.Context) (etcdtypes.URLsMap, error) {
if obj.Debug {
obj.Logf("getNominated()")
defer obj.Logf("getNominated(): done!")
}
// advertise endpoints
endpoints := fmt.Sprintf("%s/endpoints/%s", NS, obj.hostname)
ops := []etcd.Op{} // list of ops in this txn
if urls != nil {
// TODO: add a TTL? (etcd.WithLease)
ops = append(ops, etcd.OpPut(endpoints, urls.String())) // value is usually a "clientURL"
} else { // delete message if set to erase
ops = append(ops, etcd.OpDelete(endpoints))
}
if _, err := obj.Txn(nil, ops, nil); err != nil {
return fmt.Errorf("endpoint advertising failed") // exit in progress?
}
return nil
}
// Endpoints returns a urls map of available etcd server endpoints.
func Endpoints(obj *EmbdEtcd) (etcdtypes.URLsMap, error) {
if obj.flags.Trace {
log.Printf("Trace: Etcd: Endpoints()")
defer log.Printf("Trace: Etcd: Endpoints(): Finished!")
}
path := fmt.Sprintf("%s/endpoints/", NS)
keyMap, err := obj.Get(path, etcd.WithPrefix())
p := obj.NS + NominatedPath
keyMap, err := obj.client.Get(ctx, p, etcd.WithPrefix()) // map[string]string, bool
if err != nil {
return nil, fmt.Errorf("endpoints aren't available: %v", err)
return nil, errwrap.Wrapf(err, "can't get nominated peers")
}
endpoints := make(etcdtypes.URLsMap)
for key, val := range keyMap { // loop through directory of endpoints
if !strings.HasPrefix(key, path) {
nominated := make(etcdtypes.URLsMap)
for key, val := range keyMap { // loop through directory of nominated
if !strings.HasPrefix(key, p) {
continue
}
name := key[len(path):] // get name of volunteer
name := key[len(p):] // get name of nominee
if val == "" { // skip "erased" values
continue
}
urls, err := etcdtypes.NewURLs(strings.Split(val, ","))
if err != nil {
return nil, fmt.Errorf("endpoints data format error: %v", err)
return nil, errwrap.Wrapf(err, "data format error")
}
endpoints[name] = urls // add to map
if obj.flags.Debug {
log.Printf("Etcd: Endpoint(%v): %v", name, val)
nominated[name] = urls // add to map
}
}
return endpoints, nil
return nominated, nil
}
// SetHostnameConverged sets whether a specific hostname is converged.
func SetHostnameConverged(obj *EmbdEtcd, hostname string, isConverged bool) error {
if obj.flags.Trace {
log.Printf("Trace: Etcd: SetHostnameConverged(%s): %v", hostname, isConverged)
defer log.Printf("Trace: Etcd: SetHostnameConverged(%v): Finished!", hostname)
// getEndpoints returns a urls map of available endpoints for clients.
func (obj *EmbdEtcd) getEndpoints(ctx context.Context) (etcdtypes.URLsMap, error) {
if obj.Debug {
obj.Logf("getEndpoints()")
defer obj.Logf("getEndpoints(): done!")
}
converged := fmt.Sprintf("%s/converged/%s", NS, hostname)
op := []etcd.Op{etcd.OpPut(converged, fmt.Sprintf("%t", isConverged))}
if _, err := obj.Txn(nil, op, nil); err != nil { // TODO: do we need a skipConv flag here too?
return fmt.Errorf("set converged failed") // exit in progress?
}
return nil
}
// HostnameConverged returns a map of every hostname's converged state.
func HostnameConverged(obj *EmbdEtcd) (map[string]bool, error) {
if obj.flags.Trace {
log.Printf("Trace: Etcd: HostnameConverged()")
defer log.Printf("Trace: Etcd: HostnameConverged(): Finished!")
}
path := fmt.Sprintf("%s/converged/", NS)
keyMap, err := obj.ComplexGet(path, true, etcd.WithPrefix()) // don't un-converge
p := obj.NS + EndpointsPath
keyMap, err := obj.client.Get(ctx, p, etcd.WithPrefix())
if err != nil {
return nil, fmt.Errorf("converged values aren't available: %v", err)
return nil, errwrap.Wrapf(err, "can't get client endpoints")
}
converged := make(map[string]bool)
for key, val := range keyMap { // loop through directory...
if !strings.HasPrefix(key, path) {
endpoints := make(etcdtypes.URLsMap)
for key, val := range keyMap { // loop through directory of endpoints
if !strings.HasPrefix(key, p) {
continue
}
name := key[len(path):] // get name of key
name := key[len(p):] // get name of volunteer
if val == "" { // skip "erased" values
continue
}
b, err := strconv.ParseBool(val)
urls, err := etcdtypes.NewURLs(strings.Split(val, ","))
if err != nil {
return nil, fmt.Errorf("converged data format error: %v", err)
return nil, errwrap.Wrapf(err, "data format error")
}
converged[name] = b // add to map
endpoints[name] = urls // add to map
}
return converged, nil
}
// AddHostnameConvergedWatcher adds a watcher with a callback that runs on
// hostname state changes.
func AddHostnameConvergedWatcher(obj *EmbdEtcd, callbackFn func(map[string]bool) error) (func(), error) {
path := fmt.Sprintf("%s/converged/", NS)
internalCbFn := func(re *RE) error {
// TODO: get the value from the response, and apply delta...
// for now, just run a get operation which is easier to code!
m, err := HostnameConverged(obj)
if err != nil {
return err
}
return callbackFn(m) // call my function
}
return obj.AddWatcher(path, internalCbFn, true, true, etcd.WithPrefix()) // no block and no converger reset
}
// SetClusterSize sets the ideal target cluster size of etcd peers.
func SetClusterSize(obj *EmbdEtcd, value uint16) error {
if obj.flags.Trace {
log.Printf("Trace: Etcd: SetClusterSize(): %v", value)
defer log.Printf("Trace: Etcd: SetClusterSize(): Finished!")
}
key := fmt.Sprintf("%s/idealClusterSize", NS)
if err := obj.Set(key, strconv.FormatUint(uint64(value), 10)); err != nil {
return fmt.Errorf("function SetClusterSize failed: %v", err) // exit in progress?
}
return nil
}
// GetClusterSize gets the ideal target cluster size of etcd peers.
func GetClusterSize(obj *EmbdEtcd) (uint16, error) {
key := fmt.Sprintf("%s/idealClusterSize", NS)
keyMap, err := obj.Get(key)
if err != nil {
return 0, fmt.Errorf("function GetClusterSize failed: %v", err)
}
val, exists := keyMap[key]
if !exists || val == "" {
return 0, fmt.Errorf("function GetClusterSize failed: %v", err)
}
v, err := strconv.ParseUint(val, 10, 16)
if err != nil {
return 0, fmt.Errorf("function GetClusterSize failed: %v", err)
}
return uint16(v), nil
}
// MemberAdd adds a member to the cluster.
func MemberAdd(obj *EmbdEtcd, peerURLs etcdtypes.URLs) (*etcd.MemberAddResponse, error) {
//obj.Connect(false) // TODO: ?
ctx := context.Background()
var response *etcd.MemberAddResponse
var err error
for {
if obj.exiting { // the exit signal has been sent!
return nil, fmt.Errorf("exiting etcd")
}
obj.rLock.RLock()
response, err = obj.client.MemberAdd(ctx, peerURLs.StringSlice())
obj.rLock.RUnlock()
if err == nil {
break
}
if ctx, err = obj.CtxError(ctx, err); err != nil {
return nil, err
}
}
return response, nil
}
// MemberRemove removes a member by mID and returns if it worked, and also
// if there was an error. This is because it might have run without error, but
// the member wasn't found, for example.
func MemberRemove(obj *EmbdEtcd, mID uint64) (bool, error) {
//obj.Connect(false) // TODO: ?
ctx := context.Background()
for {
if obj.exiting { // the exit signal has been sent!
return false, fmt.Errorf("exiting etcd")
}
obj.rLock.RLock()
_, err := obj.client.MemberRemove(ctx, mID)
obj.rLock.RUnlock()
if err == nil {
break
} else if err == rpctypes.ErrMemberNotFound {
// if we get this, member already shut itself down :)
return false, nil
}
if ctx, err = obj.CtxError(ctx, err); err != nil {
return false, err
}
}
return true, nil
}
// Members returns information on cluster membership.
// The member ID's are the keys, because an empty names means unstarted!
// TODO: consider queueing this through the main loop with CtxError(ctx, err)
func Members(obj *EmbdEtcd) (map[uint64]string, error) {
//obj.Connect(false) // TODO: ?
ctx := context.Background()
var response *etcd.MemberListResponse
var err error
for {
if obj.exiting { // the exit signal has been sent!
return nil, fmt.Errorf("exiting etcd")
}
obj.rLock.RLock()
if obj.flags.Trace {
log.Printf("Trace: Etcd: Members(): Endpoints are: %v", obj.client.Endpoints())
}
response, err = obj.client.MemberList(ctx)
obj.rLock.RUnlock()
if err == nil {
break
}
if ctx, err = obj.CtxError(ctx, err); err != nil {
return nil, err
}
}
members := make(map[uint64]string)
for _, x := range response.Members {
members[x.ID] = x.Name // x.Name will be "" if unstarted!
}
return members, nil
}
// Leader returns the current leader of the etcd server cluster.
func Leader(obj *EmbdEtcd) (string, error) {
//obj.Connect(false) // TODO: ?
membersMap, err := Members(obj)
if err != nil {
return "", err
}
addresses := obj.LocalhostClientURLs() // heuristic, but probably correct
if len(addresses) == 0 {
// probably a programming error...
return "", fmt.Errorf("programming error")
}
endpoint := addresses[0].Host // FIXME: arbitrarily picked the first one
// part two
ctx := context.Background()
var response *etcd.StatusResponse
for {
if obj.exiting { // the exit signal has been sent!
return "", fmt.Errorf("exiting etcd")
}
obj.rLock.RLock()
response, err = obj.client.Maintenance.Status(ctx, endpoint)
obj.rLock.RUnlock()
if err == nil {
break
}
if ctx, err = obj.CtxError(ctx, err); err != nil {
return "", err
}
}
// isLeader: response.Header.MemberId == response.Leader
for id, name := range membersMap {
if id == response.Leader {
return name, nil
}
}
return "", fmt.Errorf("members map is not current") // not found
return endpoints, nil
}

309
etcd/server.go Normal file
View File

@@ -0,0 +1,309 @@
// Mgmt
// Copyright (C) 2013-2019+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package etcd
import (
"fmt"
"os"
"path"
"strings"
"time"
"github.com/purpleidea/mgmt/util"
"github.com/purpleidea/mgmt/util/errwrap"
"github.com/coreos/etcd/embed"
etcdtypes "github.com/coreos/etcd/pkg/types"
)
const (
// MaxServerStartTimeout is the amount of time to wait for the server
// to start before considering it a failure. If you hit this timeout,
// let us know so that we can analyze the situation, and increase this
// if necessary.
MaxServerStartTimeout = 60 * time.Second
// MaxServerCloseTimeout is the maximum amount of time we'll wait for
// the server to close down. If it exceeds this, it's probably a bug.
MaxServerCloseTimeout = 15 * time.Second
// MaxServerRetries is the maximum number of times we can try to restart
// the server if it fails on startup. This can help workaround some
// timing bugs in etcd.
MaxServerRetries = 5
// ServerRetryWait is the amount of time to wait between retries.
ServerRetryWait = 500 * time.Millisecond
)
// serverAction represents the desired server state.
type serverAction uint8
const (
serverActionStop serverAction = iota
serverActionStart
)
// serverAction returns whether we should do the action requested. The action is
// either start (true) or stop (false) as input. For example, if we run this as:
// true -> true, it means we asked if we should start, and the answer is yes.
func (obj *EmbdEtcd) serverAction(action serverAction) bool {
// check if i have actually volunteered first of all...
if obj.NoServer || len(obj.ServerURLs) == 0 {
obj.Logf("inappropriately nominated, rogue or stale server?")
return false // no action
}
_, exists := obj.nominated[obj.Hostname] // am i nominated?
// if there are no other peers, we create a new server
// TODO: do we need an || len(obj.nominated) == 0 if we're the first?
newCluster := len(obj.nominated) == 1 && exists
switch action {
case serverActionStart:
// we start if...
return obj.server == nil && (exists || newCluster)
case serverActionStop:
// we stop if...
return obj.server != nil && !exists
}
return false // no action needed
}
// runServer kicks of a new embedded etcd server. It exits when the server shuts
// down. The exit can be triggered at any time by running destroyServer or if it
// exits due to some condition like an error.
// FIXME: should peerURLsMap just use obj.nominated instead?
func (obj *EmbdEtcd) runServer(newCluster bool, peerURLsMap etcdtypes.URLsMap) (reterr error) {
obj.Logf("server: runServer: (newCluster=%t): %+v", newCluster, peerURLsMap)
defer obj.Logf("server: runServer: done!")
//obj.serverwg.Wait() // bonus, but instead, a mutex would be race free!
obj.serverwg.Add(1)
defer obj.serverwg.Done()
defer obj.serverExitsSignal.Send()
dataDir := fmt.Sprintf("%s/", path.Join(obj.Prefix, "server"))
if err := os.MkdirAll(dataDir, 0770); err != nil {
return errwrap.Wrapf(err, "couldn't mkdir: %s", dataDir)
}
memberName := obj.Hostname
// if no peer URLs exist, then starting a server is mostly only for some
// testing, but etcd doesn't allow the value to be empty so we use this!
peerURLs, err := etcdtypes.NewURLs([]string{"http://localhost:0"})
if err != nil {
return errwrap.Wrapf(err, "invalid URLs")
}
if len(obj.ServerURLs) > 0 {
peerURLs = obj.ServerURLs
}
initialPeerURLsMap, err := copyURLsMap(peerURLsMap)
if err != nil {
return errwrap.Wrapf(err, "error copying URLsMap")
}
// add self to list if it's not already in there...
if _, exists := peerURLsMap[memberName]; !exists {
initialPeerURLsMap[memberName] = peerURLs
}
// TODO: do we need to copy?
aPUrls := peerURLs
if len(obj.AServerURLs) > 0 {
aPUrls = obj.AServerURLs
}
// NOTE: this logic is similar to obj.curls()
aCUrls := obj.ClientURLs
if len(obj.AClientURLs) > 0 {
aCUrls = obj.AClientURLs
}
// embed etcd
cfg := embed.NewConfig()
cfg.Name = memberName // hostname
cfg.Dir = dataDir
cfg.LPUrls = peerURLs
cfg.LCUrls = obj.ClientURLs
cfg.APUrls = aPUrls
cfg.ACUrls = aCUrls
cfg.StrictReconfigCheck = false // XXX: workaround https://github.com/coreos/etcd/issues/6305
cfg.MaxTxnOps = DefaultMaxTxnOps
cfg.InitialCluster = initialPeerURLsMap.String() // including myself!
if newCluster {
cfg.ClusterState = embed.ClusterStateFlagNew
} else {
cfg.ClusterState = embed.ClusterStateFlagExisting
}
//cfg.ForceNewCluster = newCluster // TODO: ?
if err := cfg.Validate(); err != nil {
return errwrap.Wrapf(err, "server config is invalid")
}
obj.Logf("server: starting...")
// TODO: etcd panics with: `create wal error: no space left on device`
// see: https://github.com/etcd-io/etcd/issues/10588
defer func() {
if r := recover(); r != nil { // magic panic catcher
obj.Logf("server: panic: %s", r)
reterr = fmt.Errorf("panic during start with: %s", r) // set named return err
}
}()
// XXX: workaround: https://github.com/etcd-io/etcd/issues/10626
// This runs when we see the nominate operation. This could also error
// if this races to start up, and happens before the member add runs.
count := 0
for {
obj.server, err = embed.StartEtcd(cfg)
if err == nil {
break
}
e := err.Error()
// catch: error validating peerURLs ... member count is unequal
if strings.HasPrefix(e, "error validating peerURLs") && strings.HasSuffix(e, "member count is unequal") {
count++
if count > MaxServerRetries {
err = errwrap.Wrapf(err, "workaround retries (%d) exceeded", MaxServerRetries)
break
}
obj.Logf("waiting %s for retry", ServerRetryWait.String())
time.Sleep(ServerRetryWait)
continue
}
break
}
defer func() {
obj.server = nil // important because this is used as an isRunning flag
}()
if err != nil {
// early debug logs in case something downstream blocks
if obj.Debug {
obj.Logf("server failing with: %+v", err)
}
return errwrap.Wrapf(err, "server start failed")
}
closedChan := make(chan struct{})
defer func() {
select {
case <-time.After(MaxServerCloseTimeout):
obj.Logf("server: close timeout of %s reached", MaxServerCloseTimeout.String())
case <-closedChan:
}
}()
defer func() {
// no wg here, since we want to let it die on exit if need be...
// XXX: workaround: https://github.com/etcd-io/etcd/issues/10600
go func() {
obj.server.Close() // this blocks until server has stopped
close(closedChan) // woo!
}()
}()
defer obj.server.Server.Stop() // trigger a shutdown
select {
case <-obj.server.Server.ReadyNotify(): // we hang here if things are bad
obj.Logf("server: ready") // it didn't hang!
// TODO: should we wait for this notification elsewhere?
case <-obj.server.Server.StopNotify(): // it's going down now...
err := fmt.Errorf("received stop notification")
obj.Logf("server: stopped: %v", err)
return err
case <-time.After(MaxServerStartTimeout):
err := fmt.Errorf("start timeout of %s reached", MaxServerStartTimeout.String())
obj.Logf("server: %v", err)
return err
}
obj.serverID = uint64(obj.server.Server.ID()) // store member id for internal use
defer func() {
obj.serverID = 0 // reset
}()
obj.addSelfState() // add to endpoints list so self client can connect!
obj.setEndpoints() // sync client with new endpoints
defer obj.setEndpoints()
defer obj.rmMemberState(obj.Hostname)
obj.serverReadySignal.Send() // send a signal, and then reset the signal
for {
select {
case err, ok := <-obj.server.Err():
if !ok { // server shut down
return errwrap.Wrapf(err, "server shutdown error")
}
case <-obj.serverExit.Signal():
return errwrap.Wrapf(obj.serverExit.Error(), "server signal exit")
}
}
//return nil // unreachable
}
// destroyServer shuts down the embedded etcd server portion.
func (obj *EmbdEtcd) destroyServer() error {
// This function must be thread-safe because a destroy request will
// cause runServer to return, which then runs the defer of this function
// which is meant to clean up when an independent, normal runServer
// return happens. Add the mutex to protect against races on this call.
obj.servermu.Lock()
defer obj.servermu.Unlock()
if obj.server == nil {
return nil // don't error on redundant calls
}
obj.Logf("server: destroyServer...")
defer obj.Logf("server: destroyServer: done!")
obj.serverExit.Done(nil) // trigger an exit
obj.serverwg.Wait() // wait for server to finish shutting down
defer func() {
obj.serverExit = util.NewEasyExit() // reset
}()
return obj.serverExit.Error()
}
// ServerReady returns a channel that closes when we're up and running. This
// process happens when calling runServer. If runServer is never called, this
// will never happen. It also returns a cancel/ack function which must be called
// once the signal is received or we are done watching it. This is because this
// is a cyclical signal which happens, and then gets reset as the server starts
// up, shuts down, and repeats the cycle. The cancel/ack function ensures that
// we only watch a signal when it's ready to be read, and only reset it when we
// are done watching it.
func (obj *EmbdEtcd) ServerReady() (<-chan struct{}, func()) {
return obj.serverReadySignal.Subscribe()
}
// ServerExited returns a channel that closes when the server is destroyed. This
// process happens after runServer exits. If runServer is never called, this
// will never happen. It also returns a cancel/ack function which must be called
// once the signal is received or we are done watching it. This is because this
// is a cyclical signal which happens, and then gets reset as the server starts
// up, shuts down, and repeats the cycle. The cancel/ack function ensures that
// we only watch a signal when it's ready to be read, and only reset it when we
// are done watching it.
func (obj *EmbdEtcd) ServerExited() (<-chan struct{}, func()) {
return obj.serverExitsSignal.Subscribe()
}

163
etcd/tasks.go Normal file
View File

@@ -0,0 +1,163 @@
// Mgmt
// Copyright (C) 2013-2019+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package etcd
import (
"fmt"
"github.com/purpleidea/mgmt/util/errwrap"
)
// task represents a single task to run. These are useful for pending work that
// we want to schedule, but that shouldn't permanently error the system on
// error. In particular idempotent tasks that are safe are ideal for this queue.
// The tasks can be added with queueTask.
type task struct {
name string // name of task
fn func() error // task to run
retry int // number of times to retry on error, -1 for infinite
block bool // should we block the queue until this succeeds?
report bool // should we report the error on permanent failure?
}
// String prints a string representation of the struct.
func (obj *task) String() string {
return fmt.Sprintf("task(%s)", obj.name)
}
// queueTask adds a task to the task worker queue. If you want to specify any
// properties that differ from the defaults, use queueRawTask instead.
func (obj *EmbdEtcd) queueTask(fn func() error) error {
obj.taskQueueLock.Lock()
obj.taskQueueLock.Unlock()
t := &task{
fn: fn,
}
return obj.queueRawTask(t)
}
// queueRawTask adds a task of any format to the queue. You should not name your
// task a string which could match a positive integer. Those names are used when
// an unnamed task is specified and the system needs to generate a name.
func (obj *EmbdEtcd) queueRawTask(t *task) error {
if obj.Debug {
obj.Logf("queueRawTask()")
defer obj.Logf("queueRawTask(): done!")
}
if t == nil {
return fmt.Errorf("nil task")
}
obj.taskQueueLock.Lock()
defer obj.taskQueueLock.Unlock()
if obj.taskQueue == nil { // killed signal
return fmt.Errorf("task queue killed")
}
if t.name == "" {
obj.taskQueueID++ // increment
t.name = fmt.Sprintf("%d", obj.taskQueueID)
}
obj.taskQueue = append(obj.taskQueue, t)
if !obj.taskQueueRunning {
obj.taskQueueRunning = true
obj.taskQueueWg.Add(1)
go obj.runTaskQueue()
}
return nil
}
// killTaskQueue empties the task queue, causing it to shutdown.
func (obj *EmbdEtcd) killTaskQueue() int {
obj.taskQueueLock.Lock()
count := len(obj.taskQueue)
obj.taskQueue = nil // clear queue
obj.taskQueueLock.Unlock()
obj.taskQueueWg.Wait() // wait for queue to exit
obj.taskQueue = []*task{} // reset
return count // number of tasks deleted
}
// runTaskQueue processes the task queue. This is started automatically by
// queueTask if needed. It will shut itself down when the queue is empty.
func (obj *EmbdEtcd) runTaskQueue() {
defer obj.taskQueueWg.Done() // added in queueTask
for {
obj.taskQueueLock.Lock()
if obj.taskQueue == nil || len(obj.taskQueue) == 0 {
defer obj.taskQueueLock.Unlock()
obj.taskQueueRunning = false
return
}
var t *task
t, obj.taskQueue = obj.taskQueue[0], obj.taskQueue[1:]
obj.taskQueueLock.Unlock()
if !t.block {
if obj.Debug {
obj.Logf("%s: run...", t)
}
err := t.fn()
if obj.Debug {
obj.Logf("%s: done: %v", t, err)
}
if err != nil {
if t.retry == 0 {
if t.report {
// send a permanent error
// XXX: guard errChan for early close... hmmm
select {
case obj.errChan <- errwrap.Wrapf(err, "task error"):
}
}
continue
}
if t.retry > 0 { // don't decrement from -1
t.retry--
}
obj.taskQueueLock.Lock()
if obj.taskQueue != nil { // killed signal
obj.taskQueue = append(obj.taskQueue, t)
}
obj.taskQueueLock.Unlock()
}
continue
}
// block
for {
if obj.Debug {
obj.Logf("%s: run...", t)
}
err := t.fn()
if obj.Debug {
obj.Logf("%s: done: %v", t, err)
}
if err != nil {
if t.retry == 0 {
break
}
if t.retry > 0 { // don't decrement from -1
t.retry--
}
}
}
}
}

173
etcd/util.go Normal file
View File

@@ -0,0 +1,173 @@
// Mgmt
// Copyright (C) 2013-2019+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package etcd
// TODO: move to sub-package if this expands in utility or is used elsewhere...
import (
"fmt"
"net/url"
"strings"
"github.com/purpleidea/mgmt/util/errwrap"
etcdtypes "github.com/coreos/etcd/pkg/types"
)
// copyURL copies a URL.
// TODO: submit this upstream to etcd ?
func copyURL(u *url.URL) (*url.URL, error) {
if u == nil {
return nil, fmt.Errorf("empty URL specified")
}
return url.Parse(u.String()) // copy it
}
// copyURLs copies a URLs.
// TODO: submit this upstream to etcd ?
func copyURLs(urls etcdtypes.URLs) (etcdtypes.URLs, error) {
out := []url.URL{}
for _, x := range urls {
u, err := copyURL(&x)
if err != nil {
return nil, err
}
out = append(out, *u)
}
return out, nil
}
// copyURLsMap copies a URLsMap.
// TODO: submit this upstream to etcd ?
func copyURLsMap(urlsMap etcdtypes.URLsMap) (etcdtypes.URLsMap, error) {
out := make(etcdtypes.URLsMap)
for k, v := range urlsMap {
urls, err := copyURLs(v)
if err != nil {
return nil, err
}
out[k] = urls
}
return out, nil
}
// cmpURLs compares two URLs, and returns nil if they are the same.
func cmpURLs(u1, u2 etcdtypes.URLs) error {
if (u1 == nil) != (u2 == nil) { // xor
return fmt.Errorf("lists differ")
}
if len(u1) != len(u2) {
return fmt.Errorf("length of lists is not the same")
}
for i, v1 := range u1 {
if v1 != u2[i] {
return fmt.Errorf("index %d differs", i)
}
}
return nil
}
// cmpURLsMap compares two URLsMap's, and returns nil if they are the same.
func cmpURLsMap(m1, m2 etcdtypes.URLsMap) error {
if (m1 == nil) != (m2 == nil) { // xor
return fmt.Errorf("maps differ")
}
if len(m1) != len(m2) {
return fmt.Errorf("length of maps is not the same")
}
for k, v1 := range m1 {
v2, exists := m2[k]
if !exists {
return fmt.Errorf("key `%s` not found in map 2", k)
}
if err := cmpURLs(v1, v2); err != nil {
return errwrap.Wrapf(err, "values at key `%s` differ", k)
}
}
return nil
}
// newURLsMap is a helper to build a new URLsMap without having to import the
// messy etcdtypes package.
func newURLsMap() etcdtypes.URLsMap {
return make(etcdtypes.URLsMap)
}
func fromURLsToStringList(urls etcdtypes.URLs) []string {
result := []string{}
for _, u := range urls { // flatten map
result = append(result, u.String()) // use full url including scheme
}
return result
}
// fromURLsMapToStringList flattens a map of URLs into a single string list.
// Remember to sort the result if you want it to be deterministic!
func fromURLsMapToStringList(m etcdtypes.URLsMap) []string {
result := []string{}
for _, x := range m { // flatten map
for _, u := range x {
result = append(result, u.String()) // use full url including scheme
}
}
return result
}
// validateURLsMap checks if each embedded URL is parseable correctly.
//func validateURLsMap(urlsMap etcdtypes.URLsMap) error {
// _, err := copyURLsMap(urlsMap) // would fail if anything didn't parse
// return err
//}
// localhostURLs returns the most localhost like URLs for direct connection.
// This gets clients to talk to the local servers first before looking remotely.
// TODO: improve this algorithm as it's currently a bad heuristic
func localhostURLs(urls etcdtypes.URLs) etcdtypes.URLs {
out := etcdtypes.URLs{}
for _, u := range urls {
// "localhost" or anything in 127.0.0.0/8 is valid!
if strings.HasPrefix(u.Host, "localhost") || strings.HasPrefix(u.Host, "127.") {
out = append(out, u)
continue
}
// or ipv6 localhost
// TODO: are there others to add here?
if strings.HasPrefix(u.Host, "[::1]") {
out = append(out, u)
continue
}
// or local unix domain sockets
if u.Scheme == "unix" {
out = append(out, u)
continue
}
}
return out
}
//func urlRemoveScheme(urls etcdtypes.URLs) []string {
// strs := []string{}
// for _, u := range urls {
// strs = append(strs, u.Host) // remove http:// prefix
// }
// return strs
//}

189
etcd/util_test.go Normal file
View File

@@ -0,0 +1,189 @@
// Mgmt
// Copyright (C) 2013-2019+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// +build !root
package etcd
import (
"net/url"
"testing"
)
func TestCopyURL0(t *testing.T) {
// list of urls to test
strs := []string{
"",
"http://192.168.13.42:2379",
"https://192.168.13.42:2380",
"http://192.168.13.42",
"https://192.168.13.42",
}
for _, str := range strs {
t.Logf("testing: `%s`", str)
u1, err := url.Parse(str)
if err != nil {
t.Errorf("url did not parse: %+v", err)
continue
}
u2, err := copyURL(u1)
if err != nil {
t.Errorf("url did not copy: %+v", err)
continue
}
if s := u2.String(); s != str {
t.Errorf("url did not cmp, got: `%s`, expected: `%s`", s, str)
}
// bonus test (add to separate lists of size one)
if err := cmpURLs([]url.URL{*u1}, []url.URL{*u2}); err != nil {
t.Errorf("urls did not cmp, err: %+v", err)
}
}
}
func TestCopyURLs0(t *testing.T) {
// list of urls lists to test
nstrs := [][]string{
{}, // empty!
{
"http://192.168.13.42:2379",
"https://192.168.13.42:2380",
"http://192.168.13.42",
"https://192.168.13.42",
},
{
"http://192.168.42.42:2379",
"https://192.168.13.42:2380",
"http://192.168.99.42",
"https://10.10.1.255",
},
{
"http://example.com:2379",
"https://purpleidea.com/:2379",
"http://192.168.13.42",
"https://192.168.13.42",
},
}
for _, strs := range nstrs {
t.Logf("testing: `%s`", strs)
urls1 := []url.URL{}
for _, str := range strs {
u, err := url.Parse(str)
if err != nil {
t.Errorf("url did not parse: %+v", err)
continue
}
urls1 = append(urls1, *u)
}
urls2, err := copyURLs(urls1)
if err != nil {
t.Errorf("urls did not copy: %+v", err)
continue
}
if err := cmpURLs(urls1, urls2); err != nil {
t.Errorf("urls did not cmp, err: %+v", err)
}
}
}
func TestCopyURLsMap0(t *testing.T) {
// list of urls lists to test
nmstrs := []map[string][]string{
{}, // empty!
{
"h1": []string{}, // empty
"h2": []string{}, // empty
"h3": []string{}, // empty
},
{
"h1": []string{}, // empty
"h2": nil, // nil !
"h3": []string{}, // empty
},
{
"h1": []string{}, // empty
"h2": []string{
"http://example.com:2379",
"https://purpleidea.com/:2379",
"http://192.168.13.42",
"https://192.168.13.42",
},
},
{
"h1": []string{
"http://192.168.13.42:2379",
"https://192.168.13.42:2380",
"http://192.168.13.42",
"https://192.168.13.42",
},
"h2": []string{
"http://example.com:2379",
"https://purpleidea.com/:2379",
"http://192.168.13.42",
"https://192.168.13.42",
},
},
{
"h1": []string{
"http://192.168.13.42:2379",
"https://192.168.13.42:2380",
"http://192.168.13.42",
"https://192.168.13.42",
},
"h2": nil, // nil !
"h3": []string{
"http://example.com:2379",
"https://purpleidea.com/:2379",
"http://192.168.13.42",
"https://192.168.13.42",
},
},
}
for _, mstrs := range nmstrs {
t.Logf("testing: `%s`", mstrs)
urlsMap1 := newURLsMap()
for key, strs := range mstrs {
urls := []url.URL{}
for _, str := range strs {
u, err := url.Parse(str)
if err != nil {
t.Errorf("url did not parse: %+v", err)
continue
}
urls = append(urls, *u)
}
urlsMap1[key] = urls
}
urlsMap2, err := copyURLsMap(urlsMap1)
if err != nil {
t.Errorf("urlsMap did not copy: %+v", err)
continue
}
if err := cmpURLsMap(urlsMap1, urlsMap2); err != nil {
t.Errorf("urlsMap did not cmp, err: %+v", err)
}
}
}

View File

@@ -18,19 +18,27 @@
package etcd
import (
"context"
"fmt"
"net/url"
"strings"
"github.com/purpleidea/mgmt/engine"
"github.com/purpleidea/mgmt/etcd/chooser"
"github.com/purpleidea/mgmt/etcd/client"
"github.com/purpleidea/mgmt/etcd/client/resources"
"github.com/purpleidea/mgmt/etcd/client/str"
"github.com/purpleidea/mgmt/etcd/client/strmap"
etcdfs "github.com/purpleidea/mgmt/etcd/fs"
"github.com/purpleidea/mgmt/etcd/interfaces"
"github.com/purpleidea/mgmt/etcd/scheduler"
"github.com/purpleidea/mgmt/util"
)
// World is an etcd backed implementation of the World interface.
type World struct {
Hostname string // uuid for the consumer of these
EmbdEtcd *EmbdEtcd
Client interfaces.Client
MetadataPrefix string // expected metadata prefix
StoragePrefix string // storage prefix for etcdfs storage
StandaloneFs engine.Fs // store an fs here for local usage
@@ -40,72 +48,113 @@ type World struct {
// ResWatch returns a channel which spits out events on possible exported
// resource changes.
func (obj *World) ResWatch() chan error {
return WatchResources(obj.EmbdEtcd)
func (obj *World) ResWatch(ctx context.Context) (chan error, error) {
return resources.WatchResources(ctx, obj.Client)
}
// ResExport exports a list of resources under our hostname namespace.
// Subsequent calls replace the previously set collection atomically.
func (obj *World) ResExport(resourceList []engine.Res) error {
return SetResources(obj.EmbdEtcd, obj.Hostname, resourceList)
func (obj *World) ResExport(ctx context.Context, resourceList []engine.Res) error {
return resources.SetResources(ctx, obj.Client, obj.Hostname, resourceList)
}
// ResCollect gets the collection of exported resources which match the filter.
// It does this atomically so that a call always returns a complete collection.
func (obj *World) ResCollect(hostnameFilter, kindFilter []string) ([]engine.Res, error) {
func (obj *World) ResCollect(ctx context.Context, hostnameFilter, kindFilter []string) ([]engine.Res, error) {
// XXX: should we be restricted to retrieving resources that were
// exported with a tag that allows or restricts our hostname? We could
// enforce that here if the underlying API supported it... Add this?
return GetResources(obj.EmbdEtcd, hostnameFilter, kindFilter)
return resources.GetResources(ctx, obj.Client, hostnameFilter, kindFilter)
}
// IdealClusterSizeWatch returns a stream of errors anytime the cluster-wide
// dynamic cluster size setpoint changes.
func (obj *World) IdealClusterSizeWatch(ctx context.Context) (chan error, error) {
c := client.NewClientFromSimple(obj.Client, ChooserPath)
if err := c.Init(); err != nil {
return nil, err
}
util.WgFromCtx(ctx).Add(1)
go func() {
util.WgFromCtx(ctx).Done()
// This must get closed *after* because it will not finish until
// the Watcher returns, because it contains a wg.Wait() in it...
defer c.Close() // ignore error
select {
case <-ctx.Done():
}
}()
return c.Watcher(ctx, chooser.IdealDynamicSizePath)
}
// IdealClusterSizeGet gets the cluster-wide dynamic cluster size setpoint.
func (obj *World) IdealClusterSizeGet(ctx context.Context) (uint16, error) {
c := client.NewClientFromSimple(obj.Client, ChooserPath)
if err := c.Init(); err != nil {
return 0, err
}
defer c.Close() // ignore error
return chooser.DynamicSizeGet(ctx, c) // use client with added namespace
}
// IdealClusterSizeSet sets the cluster-wide dynamic cluster size setpoint.
func (obj *World) IdealClusterSizeSet(ctx context.Context, size uint16) (bool, error) {
c := client.NewClientFromSimple(obj.Client, ChooserPath)
if err := c.Init(); err != nil {
return false, err
}
defer c.Close() // ignore error
return chooser.DynamicSizeSet(ctx, c, size)
}
// StrWatch returns a channel which spits out events on possible string changes.
func (obj *World) StrWatch(namespace string) chan error {
return WatchStr(obj.EmbdEtcd, namespace)
func (obj *World) StrWatch(ctx context.Context, namespace string) (chan error, error) {
return str.WatchStr(ctx, obj.Client, namespace)
}
// StrIsNotExist returns whether the error from StrGet is a key missing error.
func (obj *World) StrIsNotExist(err error) bool {
return err == ErrNotExist
return err == interfaces.ErrNotExist
}
// StrGet returns the value for the the given namespace.
func (obj *World) StrGet(namespace string) (string, error) {
return GetStr(obj.EmbdEtcd, namespace)
func (obj *World) StrGet(ctx context.Context, namespace string) (string, error) {
return str.GetStr(ctx, obj.Client, namespace)
}
// StrSet sets the namespace value to a particular string.
func (obj *World) StrSet(namespace, value string) error {
return SetStr(obj.EmbdEtcd, namespace, &value)
func (obj *World) StrSet(ctx context.Context, namespace, value string) error {
return str.SetStr(ctx, obj.Client, namespace, &value)
}
// StrDel deletes the value in a particular namespace.
func (obj *World) StrDel(namespace string) error {
return SetStr(obj.EmbdEtcd, namespace, nil)
func (obj *World) StrDel(ctx context.Context, namespace string) error {
return str.SetStr(ctx, obj.Client, namespace, nil)
}
// StrMapWatch returns a channel which spits out events on possible string changes.
func (obj *World) StrMapWatch(namespace string) chan error {
return WatchStrMap(obj.EmbdEtcd, namespace)
func (obj *World) StrMapWatch(ctx context.Context, namespace string) (chan error, error) {
return strmap.WatchStrMap(ctx, obj.Client, namespace)
}
// StrMapGet returns a map of hostnames to values in the given namespace.
func (obj *World) StrMapGet(namespace string) (map[string]string, error) {
return GetStrMap(obj.EmbdEtcd, []string{}, namespace)
func (obj *World) StrMapGet(ctx context.Context, namespace string) (map[string]string, error) {
return strmap.GetStrMap(ctx, obj.Client, []string{}, namespace)
}
// StrMapSet sets the namespace value to a particular string under the identity
// of its own hostname.
func (obj *World) StrMapSet(namespace, value string) error {
return SetStrMap(obj.EmbdEtcd, obj.Hostname, namespace, &value)
func (obj *World) StrMapSet(ctx context.Context, namespace, value string) error {
return strmap.SetStrMap(ctx, obj.Client, obj.Hostname, namespace, &value)
}
// StrMapDel deletes the value in a particular namespace.
func (obj *World) StrMapDel(namespace string) error {
return SetStrMap(obj.EmbdEtcd, obj.Hostname, namespace, nil)
func (obj *World) StrMapDel(ctx context.Context, namespace string) error {
return strmap.SetStrMap(ctx, obj.Client, obj.Hostname, namespace, nil)
}
// Scheduler returns a scheduling result of hosts in a particular namespace.
// XXX: Add a context.Context here
func (obj *World) Scheduler(namespace string, opts ...scheduler.Option) (*scheduler.Result, error) {
modifiedOpts := []scheduler.Option{}
for _, o := range opts {
@@ -115,7 +164,8 @@ func (obj *World) Scheduler(namespace string, opts ...scheduler.Option) (*schedu
modifiedOpts = append(modifiedOpts, scheduler.Debug(obj.Debug))
modifiedOpts = append(modifiedOpts, scheduler.Logf(obj.Logf))
return scheduler.Schedule(obj.EmbdEtcd.GetClient(), fmt.Sprintf("%s/scheduler/%s", NS, namespace), obj.Hostname, modifiedOpts...)
path := fmt.Sprintf(schedulerPathFmt, namespace)
return scheduler.Schedule(obj.Client.GetClient(), path, obj.Hostname, modifiedOpts...)
}
// Fs returns a distributed file system from a unique URI. For single host
@@ -144,9 +194,14 @@ func (obj *World) Fs(uri string) (engine.Fs, error) {
}
etcdFs := &etcdfs.Fs{
Client: obj.EmbdEtcd.GetClient(),
Client: obj.Client, // TODO: do we need to add a namespace?
Metadata: u.Path,
DataPrefix: obj.StoragePrefix,
Debug: obj.Debug,
Logf: func(format string, v ...interface{}) {
obj.Logf("fs: "+format, v...)
},
}
return etcdFs, nil
}

View File

@@ -0,0 +1,4 @@
# sets a cluster parameter, safe to be called identically from multiple machines
config:etcd "whatever" {
idealclustersize => 7,
}

View File

@@ -1,9 +1,10 @@
# run this example with these commands
# watch -n 0.1 'tail *' # run this in /tmp/mgmt/
# time ./mgmt run --hostname h1 --ideal-cluster-size 1 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
# time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
# time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
# time ./mgmt run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
# time ./mgmt run --hostname h1 --tmp-prefix --no-pgp empty
# time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp empty
# time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp empty
# time ./mgmt run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp empty
# time ./mgmt deploy --no-git --seeds http://127.0.0.1:2379 lang --lang examples/lang/exchange0.mcl
import "sys"
import "world"

View File

@@ -18,6 +18,7 @@
package coreworld
import (
"context"
"fmt"
"github.com/purpleidea/mgmt/lang/funcs"
@@ -75,6 +76,8 @@ func (obj *ExchangeFunc) Init(init *interfaces.Init) error {
// Stream returns the changing values that this func has over time.
func (obj *ExchangeFunc) Stream() error {
defer close(obj.init.Output) // the sender closes
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
for {
select {
// TODO: should this first chan be run as a priority channel to
@@ -106,7 +109,12 @@ func (obj *ExchangeFunc) Stream() error {
// TODO: possibly removing our stored value there first!
if obj.namespace == "" {
obj.namespace = namespace // store it
obj.watchChan = obj.init.World.StrMapWatch(obj.namespace) // watch for var changes
var err error
obj.watchChan, err = obj.init.World.StrMapWatch(ctx, obj.namespace) // watch for var changes
if err != nil {
return err
}
} else if obj.namespace != namespace {
return fmt.Errorf("can't change namespace, previously: `%s`", obj.namespace)
}
@@ -116,7 +124,7 @@ func (obj *ExchangeFunc) Stream() error {
obj.init.Logf("value: %+v", value)
}
if err := obj.init.World.StrMapSet(obj.namespace, value); err != nil {
if err := obj.init.World.StrMapSet(ctx, obj.namespace, value); err != nil {
return errwrap.Wrapf(err, "namespace write error of `%s` to `%s`", value, obj.namespace)
}
@@ -134,7 +142,7 @@ func (obj *ExchangeFunc) Stream() error {
return errwrap.Wrapf(err, "channel watch failed on `%s`", obj.namespace)
}
keyMap, err := obj.init.World.StrMapGet(obj.namespace)
keyMap, err := obj.init.World.StrMapGet(ctx, obj.namespace)
if err != nil {
return errwrap.Wrapf(err, "channel read failed on `%s`", obj.namespace)
}

View File

@@ -18,6 +18,7 @@
package coreworld
import (
"context"
"fmt"
"github.com/purpleidea/mgmt/lang/funcs"
@@ -73,6 +74,8 @@ func (obj *KVLookupFunc) Init(init *interfaces.Init) error {
// Stream returns the changing values that this func has over time.
func (obj *KVLookupFunc) Stream() error {
defer close(obj.init.Output) // the sender closes
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
for {
select {
// TODO: should this first chan be run as a priority channel to
@@ -104,9 +107,13 @@ func (obj *KVLookupFunc) Stream() error {
// TODO: possibly removing our stored value there first!
if obj.namespace == "" {
obj.namespace = namespace // store it
obj.watchChan = obj.init.World.StrMapWatch(obj.namespace) // watch for var changes
var err error
obj.watchChan, err = obj.init.World.StrMapWatch(ctx, obj.namespace) // watch for var changes
if err != nil {
return err
}
result, err := obj.buildMap() // build the map...
result, err := obj.buildMap(ctx) // build the map...
if err != nil {
return err
}
@@ -135,7 +142,7 @@ func (obj *KVLookupFunc) Stream() error {
return errwrap.Wrapf(err, "channel watch failed on `%s`", obj.namespace)
}
result, err := obj.buildMap() // build the map...
result, err := obj.buildMap(ctx) // build the map...
if err != nil {
return err
}
@@ -166,8 +173,8 @@ func (obj *KVLookupFunc) Close() error {
}
// buildMap builds the result map which we'll need. It uses struct variables.
func (obj *KVLookupFunc) buildMap() (types.Value, error) {
keyMap, err := obj.init.World.StrMapGet(obj.namespace)
func (obj *KVLookupFunc) buildMap(ctx context.Context) (types.Value, error) {
keyMap, err := obj.init.World.StrMapGet(ctx, obj.namespace)
if err != nil {
return nil, errwrap.Wrapf(err, "channel read failed on `%s`", obj.namespace)
}

View File

@@ -16,7 +16,7 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// test with:
// time ./mgmt run --hostname h1 --ideal-cluster-size 1 --tmp-prefix --no-pgp lang --lang examples/lang/schedule0.mcl
// time ./mgmt run --hostname h1 --tmp-prefix --no-pgp lang --lang examples/lang/schedule0.mcl
// time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp lang --lang examples/lang/schedule0.mcl
// time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp lang --lang examples/lang/schedule0.mcl
// kill h2 (should see h1 and h3 pick [h1, h3] instead)

View File

@@ -18,11 +18,13 @@
package lib
import (
"context"
"fmt"
"log"
"os"
"github.com/purpleidea/mgmt/etcd"
"github.com/purpleidea/mgmt/etcd/client"
"github.com/purpleidea/mgmt/etcd/deployer"
etcdfs "github.com/purpleidea/mgmt/etcd/fs"
"github.com/purpleidea/mgmt/gapi"
"github.com/purpleidea/mgmt/util/errwrap"
@@ -34,12 +36,13 @@ import (
const (
// MetadataPrefix is the etcd prefix where all our fs superblocks live.
MetadataPrefix = etcd.NS + "/fs"
MetadataPrefix = "/fs"
// StoragePrefix is the etcd prefix where all our fs data lives.
StoragePrefix = etcd.NS + "/storage"
StoragePrefix = "/storage"
)
// deploy is the cli target to manage deploys to our cluster.
// TODO: add a timeout and/or cancel signal to replace context.TODO()
func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error {
cliContext := c.Parent()
if cliContext == nil {
@@ -55,7 +58,12 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error {
debug = flags.Debug
}
}
Logf := func(format string, v ...interface{}) {
log.Printf("deploy: "+format, v...)
}
hello(program, version, flags) // say hello!
defer Logf("goodbye!")
var hash, pHash string
if !cliContext.Bool("no-git") {
@@ -74,7 +82,7 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error {
}
hash = head.Hash().String() // current commit id
log.Printf("deploy: hash: %s", hash)
Logf("hash: %s", hash)
lo := &git.LogOptions{
From: head.Hash(),
@@ -90,7 +98,7 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error {
if err == nil { // errors are okay, we might be empty
pHash = commit.Hash.String() // previous commit id
}
log.Printf("deploy: previous deploy hash: %s", pHash)
Logf("previous deploy hash: %s", pHash)
if cliContext.Bool("force") {
pHash = "" // don't check this :(
}
@@ -101,28 +109,58 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error {
uniqueid := uuid.New() // panic's if it can't generate one :P
etcdClient := &etcd.ClientEtcd{
Seeds: cliContext.StringSlice("seeds"), // endpoints
etcdClient := client.NewClientFromSeedsNamespace(
cliContext.StringSlice("seeds"), // endpoints
NS,
)
if err := etcdClient.Init(); err != nil {
return errwrap.Wrapf(err, "client Init failed")
}
if err := etcdClient.Connect(); err != nil {
return errwrap.Wrapf(err, "client connection error")
defer func() {
err := errwrap.Wrapf(etcdClient.Close(), "client Close failed")
if err != nil {
// TODO: cause the final exit code to be non-zero
Logf("client cleanup error: %+v", err)
}
defer etcdClient.Destroy()
}()
simpleDeploy := &deployer.SimpleDeploy{
Client: etcdClient,
Debug: debug,
Logf: func(format string, v ...interface{}) {
Logf("deploy: "+format, v...)
},
}
if err := simpleDeploy.Init(); err != nil {
return errwrap.Wrapf(err, "deploy Init failed")
}
defer func() {
err := errwrap.Wrapf(simpleDeploy.Close(), "deploy Close failed")
if err != nil {
// TODO: cause the final exit code to be non-zero
Logf("deploy cleanup error: %+v", err)
}
}()
// get max id (from all the previous deploys)
max, err := etcd.GetMaxDeployID(etcdClient)
max, err := simpleDeploy.GetMaxDeployID(context.TODO())
if err != nil {
return errwrap.Wrapf(err, "error getting max deploy id")
}
// find the latest id
var id = max + 1 // next id
log.Printf("deploy: max deploy id: %d", max)
Logf("previous max deploy id: %d", max)
etcdFs := &etcdfs.Fs{
Client: etcdClient.GetClient(),
Client: etcdClient,
// TODO: using a uuid is meant as a temporary measure, i hate them
Metadata: MetadataPrefix + fmt.Sprintf("/deploy/%d-%s", id, uniqueid),
DataPrefix: StoragePrefix,
Debug: debug,
Logf: func(format string, v ...interface{}) {
Logf("fs: "+format, v...)
},
}
cliInfo := &gapi.CliInfo{
@@ -154,9 +192,9 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error {
}
// this nominally checks the previous git hash matches our expectation
if err := etcd.AddDeploy(etcdClient, id, hash, pHash, &str); err != nil {
if err := simpleDeploy.AddDeploy(context.TODO(), id, hash, pHash, &str); err != nil {
return errwrap.Wrapf(err, "could not create deploy id `%d`", id)
}
log.Printf("deploy: success, id: %d", id)
Logf("success, id: %d", id)
return nil
}

View File

@@ -37,6 +37,7 @@ func hello(program, version string, flags Flags) {
log.SetFlags(logFlags)
// un-hijack from capnslog...
// XXX: move this to the etcd package when new version deprecates capnslog
log.SetOutput(os.Stderr)
if flags.Verbose {
capnslog.SetFormatter(capnslog.NewLogFormatter(os.Stderr, "(etcd) ", logFlags))

View File

@@ -18,6 +18,7 @@
package lib
import (
"context"
"fmt"
"io/ioutil"
"log"
@@ -33,6 +34,8 @@ import (
"github.com/purpleidea/mgmt/engine/graph/autogroup"
_ "github.com/purpleidea/mgmt/engine/resources" // let register's run
"github.com/purpleidea/mgmt/etcd"
"github.com/purpleidea/mgmt/etcd/chooser"
"github.com/purpleidea/mgmt/etcd/deployer"
"github.com/purpleidea/mgmt/gapi"
"github.com/purpleidea/mgmt/gapi/empty"
"github.com/purpleidea/mgmt/pgp"
@@ -44,10 +47,14 @@ import (
etcdtypes "github.com/coreos/etcd/pkg/types"
)
const (
// NS is the root namespace for etcd operations. All keys must use it!
NS = "/_mgmt" // must not end with a slash!
)
// Flags are some constant flags which are used throughout the program.
type Flags struct {
Debug bool // add additional log messages
Trace bool // add execution flow log messages
Verbose bool // add extra log message output
}
@@ -105,6 +112,7 @@ type Main struct {
Prometheus bool // enable prometheus metrics
PrometheusListen string // prometheus instance bind specification
embdEtcd *etcd.EmbdEtcd // TODO: can be an interface in the future...
ge *graph.Engine
exit *util.EasyExit // exit signal
@@ -140,7 +148,7 @@ func (obj *Main) Init() error {
obj.idealClusterSize = uint16(obj.IdealClusterSize)
if obj.IdealClusterSize < 0 { // value is undefined, set to the default
obj.idealClusterSize = etcd.DefaultIdealClusterSize
obj.idealClusterSize = chooser.DefaultIdealDynamicSize
}
if obj.idealClusterSize < 1 {
@@ -194,6 +202,7 @@ func (obj *Main) Run() error {
hello(obj.Program, obj.Version, obj.Flags) // say hello!
defer Logf("goodbye!")
exitCtx := obj.exit.Context() // local exit signal
defer obj.exit.Done(nil) // ensure this gets called even if Exit doesn't
hostname, err := os.Hostname() // a sensible default
@@ -243,13 +252,14 @@ func (obj *Main) Run() error {
if err := prom.InitKindMetrics(engine.RegisteredResourcesNames()); err != nil {
return errwrap.Wrapf(err, "can't initialize kind-specific prometheus metrics")
}
obj.cleanup = append(obj.cleanup, func() error {
defer func() {
Logf("prometheus: stopping instance")
if err := prom.Stop(); err != nil {
return errwrap.Wrapf(err, "the prometheus instance exited poorly")
err := errwrap.Wrapf(prom.Stop(), "the prometheus instance exited poorly")
if err != nil {
// TODO: cause the final exit code to be non-zero
Logf("cleanup error: %+v", err)
}
return nil
})
}()
}
if !obj.NoPgp {
@@ -296,6 +306,8 @@ func (obj *Main) Run() error {
exitchan := make(chan struct{}) // exit on close
wg := &sync.WaitGroup{} // waitgroup for inner loop & goroutines
defer wg.Wait() // wait in case we have an early exit
defer obj.exit.Done(nil) // trigger exit in case something blocks
// exit after `max-runtime` seconds for no reason at all...
if i := obj.MaxRuntime; i > 0 {
@@ -335,63 +347,108 @@ func (obj *Main) Run() error {
// XXX: should this be moved to later in the code?
go converger.Run(true) // main loop for converger, true to start paused
converger.Ready() // block until ready
obj.cleanup = append(obj.cleanup, func() error {
defer func() {
// TODO: shutdown converger, but make sure that using it in a
// still running embdEtcd struct doesn't block waiting on it...
converger.Shutdown()
return nil
})
}()
// embedded etcd
if len(obj.seeds) == 0 {
Logf("etcd: seeds: no seeds specified!")
Logf("no seeds specified!")
} else {
Logf("etcd: seeds(%d): %+v", len(obj.seeds), obj.seeds)
Logf("seeds(%d): %+v", len(obj.seeds), obj.seeds)
}
embdEtcd := etcd.NewEmbdEtcd(
hostname,
obj.seeds,
obj.clientURLs,
obj.serverURLs,
obj.advertiseClientURLs,
obj.advertiseServerURLs,
obj.NoServer,
obj.NoNetwork,
obj.idealClusterSize,
etcd.Flags{
Debug: obj.Flags.Debug,
Trace: obj.Flags.Trace,
Verbose: obj.Flags.Verbose,
},
prefix,
converger,
)
if embdEtcd == nil {
return fmt.Errorf("etcd: creation failed")
} else if err := embdEtcd.Startup(); err != nil { // startup (returns when etcd main loop is running)
return errwrap.Wrapf(err, "etcd: startup failed")
}
obj.cleanup = append(obj.cleanup, func() error {
// cleanup etcd main loop last so it can process everything first
err := embdEtcd.Destroy() // shutdown and cleanup etcd
return errwrap.Wrapf(err, "etcd: exited poorly")
})
obj.embdEtcd = &etcd.EmbdEtcd{
Hostname: hostname,
Seeds: obj.seeds,
// wait for etcd server to be ready before continuing...
// XXX: this is wrong if we're not going to be a server! we'll block!!!
// select {
// case <-embdEtcd.ServerReady():
// Logf("etcd: server: ready!")
// // pass
// case <-time.After(((etcd.MaxStartServerTimeout * etcd.MaxStartServerRetries) + 1) * time.Second):
// return fmt.Errorf("etcd: startup timeout")
// }
time.Sleep(1 * time.Second) // XXX: temporary workaround
ClientURLs: obj.clientURLs,
ServerURLs: obj.serverURLs,
AClientURLs: obj.advertiseClientURLs,
AServerURLs: obj.advertiseServerURLs,
NoServer: obj.NoServer,
NoNetwork: obj.NoNetwork,
Chooser: &chooser.DynamicSize{
IdealClusterSize: obj.idealClusterSize,
},
Converger: converger,
NS: NS, // namespace
Prefix: fmt.Sprintf("%s/", path.Join(prefix, "etcd")),
Debug: obj.Flags.Debug,
Logf: func(format string, v ...interface{}) {
log.Printf("etcd: "+format, v...)
},
}
if err := obj.embdEtcd.Init(); err != nil {
return errwrap.Wrapf(err, "etcd init failed")
}
defer func() {
// cleanup etcd main loop last so it can process everything first
err := errwrap.Wrapf(obj.embdEtcd.Close(), "etcd close failed")
if err != nil {
// TODO: cause the final exit code to be non-zero
Logf("cleanup error: %+v", err)
}
}()
var etcdErr error
// don't add a wait group here, this is done in embdEtcd.Destroy()
go func() {
etcdErr = obj.embdEtcd.Run() // returns when it shuts down...
obj.exit.Done(errwrap.Wrapf(etcdErr, "etcd run failed")) // trigger exit
}()
// tell etcd to shutdown, blocks until done!
// TODO: handle/report error?
defer obj.embdEtcd.Destroy()
// wait for etcd to be ready before continuing...
// TODO: do we need to add a timeout here?
select {
case <-obj.embdEtcd.Ready():
Logf("etcd is ready!")
// pass
case <-obj.embdEtcd.Exited():
Logf("etcd was destroyed!")
err := fmt.Errorf("etcd was destroyed on startup")
if etcdErr != nil {
err = etcdErr
}
return err
}
// TODO: should getting a client from EmbdEtcd already come with the NS?
etcdClient, err := obj.embdEtcd.MakeClientFromNamespace(NS)
if err != nil {
return errwrap.Wrapf(err, "make Client failed")
}
simpleDeploy := &deployer.SimpleDeploy{
Client: etcdClient,
Debug: obj.Flags.Debug,
Logf: func(format string, v ...interface{}) {
log.Printf("deploy: "+format, v...)
},
}
if err := simpleDeploy.Init(); err != nil {
return errwrap.Wrapf(err, "deploy Init failed")
}
defer func() {
err := errwrap.Wrapf(simpleDeploy.Close(), "deploy Close failed")
if err != nil {
// TODO: cause the final exit code to be non-zero
Logf("cleanup error: %+v", err)
}
}()
// implementation of the World API (alternatives can be substituted in)
world := &etcd.World{
Hostname: hostname,
EmbdEtcd: embdEtcd,
Client: etcdClient,
MetadataPrefix: MetadataPrefix,
StoragePrefix: StoragePrefix,
StandaloneFs: obj.DeployFs, // used for static deploys
@@ -415,9 +472,16 @@ func (obj *Main) Run() error {
}
if err := obj.ge.Init(); err != nil {
return errwrap.Wrapf(err, "engine: creation failed")
return errwrap.Wrapf(err, "engine Init failed")
}
// After this point, the inner "main loop" must run, so that the engine
defer func() {
err := errwrap.Wrapf(obj.ge.Close(), "engine Close failed")
if err != nil {
// TODO: cause the final exit code to be non-zero
Logf("cleanup error: %+v", err)
}
}()
// After this point, the inner "main loop" will run, so that the engine
// can get closed with the deploy close via the deploy chan shutdown...
// main loop logic starts here
@@ -456,7 +520,7 @@ func (obj *Main) Run() error {
obj.ge.Pause(false)
}
// must be paused before this is run
obj.ge.Close()
//obj.ge.Close() // run in defer instead
return // this is the only place we exit
}
@@ -678,9 +742,10 @@ func (obj *Main) Run() error {
// get max id (from all the previous deploys)
// this is what the existing cluster is already running
// TODO: can this block since we didn't deploy yet?
max, err := etcd.GetMaxDeployID(embdEtcd)
// TODO: add a timeout to context?
max, err := simpleDeploy.GetMaxDeployID(exitCtx)
if err != nil {
close(deployChan) // because we won't close it downstream...
return errwrap.Wrapf(err, "error getting max deploy id")
}
@@ -710,9 +775,24 @@ func (obj *Main) Run() error {
// now we can wait for future deploys, but if we already had an
// initial deploy from run, don't switch to this unless it's new
ctx, cancel := context.WithCancel(context.Background())
watchChan, err := simpleDeploy.WatchDeploy(ctx)
if err != nil {
cancel()
Logf("error starting deploy: %+v", err)
return
}
wg.Add(1)
go func() {
defer wg.Done()
defer cancel() // unblock watch deploy
select { // wait until we're ready to shutdown
case <-exitchan:
}
}()
canceled := false
var last uint64
startChan := make(chan struct{}) // start signal
close(startChan) // kick it off!
for {
if obj.NoDeployWatch && (obj.Deploy != nil || last > 0) {
// block here, because when we close the
@@ -725,29 +805,33 @@ func (obj *Main) Run() error {
}
select {
case <-startChan: // kick the loop once at start
startChan = nil // disable
case err, ok := <-etcd.WatchDeploy(embdEtcd):
// WatchDeploy should send an initial event now...
case err, ok := <-watchChan:
if !ok {
// TODO: is any of this needed in here?
if !canceled {
obj.exit.Done(nil) // regular shutdown
}
return
}
if err == context.Canceled {
canceled = true
continue // channel close is coming...
}
if err != nil {
// TODO: it broke, can we restart?
obj.exit.Done(fmt.Errorf("deploy: watch error"))
return
obj.exit.Done(errwrap.Wrapf(err, "deploy: watch error"))
continue
}
startChan = nil // disable it early...
if obj.Flags.Debug {
Logf("deploy: got activity")
}
case <-exitchan:
return
//case <-exitchan:
// return // exit via channel close instead
}
latest, err := etcd.GetMaxDeployID(embdEtcd) // or zero
latest, err := simpleDeploy.GetMaxDeployID(ctx) // or zero
if err != nil {
Logf("error getting max deploy id: %+v", err)
continue
@@ -774,7 +858,7 @@ func (obj *Main) Run() error {
// 0 passes through an empty deploy without an error...
// (unless there is some sort of etcd error that occurs)
str, err := etcd.GetDeploy(embdEtcd, latest)
str, err := simpleDeploy.GetDeploy(ctx, latest)
if err != nil {
Logf("deploy: error getting deploy: %+v", err)
continue
@@ -871,6 +955,9 @@ func (obj *Main) FastExit(err error) {
// might leave some of your resources in a partial or unknown state.
func (obj *Main) Interrupt(err error) {
// XXX: implement and run Interrupt API for supported resources
obj.FastExit(err)
if obj.embdEtcd != nil {
obj.embdEtcd.Interrupt() // unblock borked clusters
}
}

View File

@@ -175,14 +175,19 @@ func run(c *cli.Context, name string, gapiObj gapi.GAPI) error {
reterr := obj.Run()
if reterr != nil {
// log the error message returned
log.Printf("main: Error: %v", reterr)
if obj.Flags.Debug {
log.Printf("main: %+v", reterr)
}
}
if err := obj.Close(); err != nil {
log.Printf("main: Close: %v", err)
if obj.Flags.Debug {
log.Printf("main: Close: %+v", err)
}
if reterr == nil {
return err
}
reterr = errwrap.Append(reterr, err)
}
return reterr

View File

@@ -27,7 +27,6 @@ import (
// These constants are some global variables that are used throughout the code.
const (
Debug = false // add additional log messages
Trace = false // add execution flow log messages
Verbose = false // add extra log message output
)
@@ -40,7 +39,6 @@ var (
func main() {
flags := mgmt.Flags{
Debug: Debug,
Trace: Trace,
Verbose: Verbose,
}
if err := mgmt.CLI(program, version, flags); err != nil {

View File

@@ -23,17 +23,25 @@
import sys
if len(sys.argv) == 2 and sys.argv[1] != "-":
lines = open(sys.argv[1], "r").readlines()
else:
lines = sys.stdin.readlines()
print("read: %d lines" % len(lines))
# find program start
start = -1
for i in range(len(lines)):
line = lines[i]
if line.startswith("PC="):
start=i
break
if start == -1:
print("could not find program start, looking for PC=???", file=sys.stderr)
sys.exit(1)
print("starts at line: %d" % (start+1)) # +1 because we're zero based
def is_chunk(line):
@@ -59,6 +67,18 @@ def filter_chunk(chunk):
package_line = lines[1]
if package_line.startswith("github.com/purpleidea/mgmt/vendor/"):
return False
if package_line.startswith("github.com/") and not package_line.startswith("github.com/purpleidea/mgmt/"):
return False
if package_line.startswith("internal/poll"):
return False
if package_line.startswith("context.propagateCancel"):
return False
if package_line.startswith("runtime.gopark"):
return False
if package_line.startswith("runtime.futex"):
return False
if package_line.startswith("os/signal.signal_recv"):
return False
return True

View File

@@ -10,7 +10,7 @@ if ! command -v etcdctl >/dev/null; then
exit 0
fi
mkdir /tmp/mgmt/{A..E}
#mkdir /tmp/mgmt/{A..E}
# kill servers on error/exit
trap 'pkill -9 mgmt' EXIT
@@ -22,7 +22,7 @@ $TIMEOUT "$MGMT" run --hostname h3 --tmp-prefix --no-pgp --seeds http://127.0.0.
# wait for everything to converge
sleep 30s
ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/idealClusterSize 3
ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/chooser/dynamicsize/idealclustersize 3
$TIMEOUT "$MGMT" run --hostname h4 --tmp-prefix --no-pgp --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 empty &
$TIMEOUT "$MGMT" run --hostname h5 --tmp-prefix --no-pgp --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2387 --server-urls http://127.0.0.1:2388 empty &
@@ -32,7 +32,7 @@ sleep 30s
test "$(ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 member list | wc -l)" -eq 3
ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2381 put /_mgmt/idealClusterSize 5
ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/chooser/dynamicsize/idealclustersize 5
# wait for everything to converge
sleep 30s

View File

@@ -0,0 +1,27 @@
#!/bin/bash
. "$(dirname "$0")/../util.sh"
# run empty graphs, we're just testing etcd clustering
$TIMEOUT "$MGMT" run --no-pgp --tmp-prefix empty &
pid1=$!
sleep 15s # let it startup
# run a second one that should conflict because a server is already running...
$TIMEOUT "$MGMT" run --no-pgp --tmp-prefix empty &
pid2=$!
wait $pid2
e=$?
if [ $e -eq 0 ]; then
echo "second mgmt exited successfully when error was expected"
exit 1
fi
if [ $e -ne 1 ]; then
echo "second mgmt exited with unexpected error of $e"
exit $e
fi
$(kill -SIGINT $pid1)& # send ^C to exit 1st mgmt
wait $pid1 # get exit status
# if pid1 exits because of a timeout, then it blocked, and this is a bug!
exit $?

View File

@@ -0,0 +1,35 @@
#!/bin/bash -e
. "$(dirname "$0")/../util.sh"
# run empty graphs, we're just testing etcd clustering
$TIMEOUT "$MGMT" run --hostname h1 --tmp-prefix empty &
pid1=$!
sleep 15s # let it startup
$TIMEOUT "$MGMT" run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix empty &
pid2=$!
sleep 15s
$TIMEOUT "$MGMT" run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix empty &
pid3=$!
sleep 15s
$(sleep 15s && kill -SIGINT $pid1)& # send ^C to exit 1st mgmt (reversed!)
wait $pid1
e=$?
if [ $e -ne 0 ]; then
exit $e
fi
$(sleep 15s && kill -SIGINT $pid2)& # send ^C to exit 2nd mgmt
wait $pid2
e=$?
if [ $e -ne 0 ]; then
exit $e
fi
$(sleep 15s && kill -SIGINT $pid3)& # send ^C to exit 3rd mgmt (reversed!)
wait $pid3 # get exit status
# if pid3 exits because of a timeout, then it blocked, and this is a bug!
exit $?

View File

@@ -0,0 +1,24 @@
#!/bin/bash -e
. "$(dirname "$0")/../util.sh"
# run empty graphs, we're just testing etcd clustering
$TIMEOUT "$MGMT" run --hostname h1 --tmp-prefix empty &
pid1=$!
sleep 15s # let it startup
$TIMEOUT "$MGMT" run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix empty &
pid2=$!
sleep 15s
$(sleep 15s && kill -SIGINT $pid1)& # send ^C to exit 1st mgmt! (reverse!)
wait $pid1
e=$?
if [ $e -ne 0 ]; then
exit $e
fi
$(sleep 15s && kill -SIGINT $pid2)& # send ^C to exit 2nd mgmt (reverse!)
wait $pid2 # get exit status
# if pid2 exits because of a timeout, then it blocked, and this is a bug!
exit $?

View File

@@ -5,18 +5,58 @@
set -o errexit
set -o pipefail
$TIMEOUT "$MGMT" run --hostname h1 --ideal-cluster-size 1 --tmp-prefix --no-pgp lang --lang exchange0.mcl &
$TIMEOUT "$MGMT" run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp lang --lang exchange0.mcl &
$TIMEOUT "$MGMT" run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp lang --lang exchange0.mcl &
$TIMEOUT "$MGMT" run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp lang --lang exchange0.mcl &
$TIMEOUT "$MGMT" run --hostname h1 --tmp-prefix --no-pgp empty &
pid1=$!
sleep 10s
$TIMEOUT "$MGMT" run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp empty &
pid2=$!
sleep 10s
$TIMEOUT "$MGMT" run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp empty &
pid3=$!
sleep 10s
$TIMEOUT "$MGMT" run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp empty &
pid4=$!
sleep 10s
$TIMEOUT "$MGMT" deploy --no-git --seeds http://127.0.0.1:2379 lang --lang exchange0.mcl
# kill servers on error/exit
trap 'pkill -9 mgmt' EXIT
#trap 'pkill -9 mgmt' EXIT
# wait for everything to converge
sleep 10s
sleep 15s
# debug
tail /tmp/mgmt/exchange-*
test "$(cat /tmp/mgmt/exchange-* | grep -c h1)" -eq 4
test "$(cat /tmp/mgmt/exchange-* | grep -c h2)" -eq 4
test "$(cat /tmp/mgmt/exchange-* | grep -c h3)" -eq 4
test "$(cat /tmp/mgmt/exchange-* | grep -c h4)" -eq 4
$(sleep 15s && kill -SIGINT $pid4)& # send ^C to exit mgmt...
wait $pid4
e=$?
if [ $e -ne 0 ]; then
exit $e
fi
$(sleep 15s && kill -SIGINT $pid3)& # send ^C to exit mgmt...
wait $pid3
e=$?
if [ $e -ne 0 ]; then
exit $e
fi
$(sleep 15s && kill -SIGINT $pid2)& # send ^C to exit mgmt...
wait $pid2
e=$?
if [ $e -ne 0 ]; then
exit $e
fi
$(sleep 15s && kill -SIGINT $pid1)& # send ^C to exit mgmt...
wait $pid1
e=$?
if [ $e -ne 0 ]; then
exit $e
fi

View File

@@ -1,9 +1,10 @@
# run this example with these commands
# watch -n 0.1 'tail *' # run this in /tmp/mgmt/
# time ./mgmt run --hostname h1 --ideal-cluster-size 1 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
# time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
# time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
# time ./mgmt run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
# time ./mgmt run --hostname h1 --tmp-prefix --no-pgp empty
# time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp empty
# time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp empty
# time ./mgmt run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp empty
# time ./mgmt deploy --no-git --seeds http://127.0.0.1:2379 lang --lang examples/lang/exchange0.mcl
import "sys"
import "world"

View File

@@ -65,6 +65,9 @@ function consistent-imports() {
if grep $'\t"github.com/purpleidea/mgmt/engine/util"' "$1"; then # import as engineUtil
return 1
fi
if grep '"golang.org/x/net/context"' "$1"; then # use built-in context
return 1
fi
}
# run go vet on a per-package basis

View File

@@ -86,7 +86,7 @@ func TestEasyAckOnce2(t *testing.T) {
}
}
func ExampleSubscribeSync() {
func ExampleSubscribedSignal() {
fmt.Println("hello")
x := &SubscribedSignal{}

View File

@@ -430,6 +430,21 @@ func TimeAfterOrBlockCtx(ctx context.Context, t int) <-chan struct{} {
return ch
}
// CloseAfter takes a duration, similarly to `time.After`, and returns a channel
// that closes when either the context is done, or the duration expires.
func CloseAfter(ctx context.Context, d time.Duration) <-chan struct{} {
ch := make(chan struct{})
go func() {
defer close(ch)
select {
case <-time.After(d):
// done
case <-ctx.Done():
}
}()
return ch
}
// SystemBusPrivateUsable makes using the private bus usable.
// TODO: should be upstream: https://github.com/godbus/dbus/issues/15
func SystemBusPrivateUsable() (conn *dbus.Conn, err error) {
@@ -468,6 +483,26 @@ func SessionBusPrivateUsable() (conn *dbus.Conn, err error) {
return conn, nil // success
}
// PriorityStrSliceSort filters any elements matching fn to the end of the list.
// You can reverse the match result with a not to filter to the front instead!
// A copy of the list is returned, the original is not modified.
func PriorityStrSliceSort(input []string, fn func(string) bool) []string {
output := []string{}
found := []string{}
for _, x := range input {
if fn(x) { // if we find the key, don't include it just yet
found = append(found, x) // save for later
continue
}
output = append(output, x)
}
// include the keys at the end (if found)
output = append(output, found...)
return output
}
// SortedStrSliceCompare takes two lists of strings and returns whether or not
// they are equivalent. It will return nil if both sets contain the same
// elements, regardless of order, and an error if they do not.

View File

@@ -22,6 +22,7 @@ package util
import (
"reflect"
"sort"
"strings"
"testing"
)
@@ -1014,6 +1015,76 @@ func TestRemovePathPrefix0(t *testing.T) {
}
}
func TestPriorityStrSliceSort0(t *testing.T) {
in := []string{"foo", "bar", "baz"}
ex := []string{"bar", "baz", "foo"}
fn := func(x string) bool {
return x == "foo"
}
out := PriorityStrSliceSort(in, fn)
if !reflect.DeepEqual(ex, out) {
t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out)
}
}
func TestPriorityStrSliceSort1(t *testing.T) {
in := []string{"foo", "bar", "baz"}
ex := []string{"bar", "foo", "baz"}
fn := func(x string) bool {
return x != "bar" // != brings this key to the front
}
out := PriorityStrSliceSort(in, fn)
if !reflect.DeepEqual(ex, out) {
t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out)
}
}
func TestPriorityStrSliceSort2(t *testing.T) {
in := []string{"bar", "foo", "bar", "bar", "baz"}
ex := []string{"foo", "baz", "bar", "bar", "bar"}
fn := func(x string) bool {
return x == "bar"
}
out := PriorityStrSliceSort(in, fn)
if !reflect.DeepEqual(ex, out) {
t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out)
}
}
func TestPriorityStrSliceSort3(t *testing.T) {
in := []string{"foo", "bar1", "bar2", "bar3", "baz"}
ex := []string{"bar1", "bar2", "bar3", "foo", "baz"}
fn := func(x string) bool {
return !strings.HasPrefix(x, "bar")
}
out := PriorityStrSliceSort(in, fn)
if !reflect.DeepEqual(ex, out) {
t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out)
}
}
func TestPriorityStrSliceSort4(t *testing.T) {
in := []string{"foo", "bar1", "bar2", "bar3", "baz"}
ex := []string{"foo", "baz", "bar1", "bar2", "bar3"}
fn := func(x string) bool {
return strings.HasPrefix(x, "bar")
}
out := PriorityStrSliceSort(in, fn)
if !reflect.DeepEqual(ex, out) {
t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out)
}
}
func TestSortedStrSliceCompare0(t *testing.T) {
slice0 := []string{"foo", "bar", "baz"}
slice1 := []string{"bar", "foo", "baz"}

View File

@@ -18,6 +18,7 @@
package yamlgraph
import (
"context"
"fmt"
"sync"
@@ -166,6 +167,10 @@ func (obj *GAPI) Next() chan gapi.Next {
ch <- next
return
}
// FIXME: add timeout to context
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
startChan := make(chan struct{}) // start signal
close(startChan) // kick it off!
@@ -173,7 +178,16 @@ func (obj *GAPI) Next() chan gapi.Next {
if obj.data.NoStreamWatch {
watchChan = nil
} else {
watchChan = obj.data.World.ResWatch()
var err error
watchChan, err = obj.data.World.ResWatch(ctx)
if err != nil {
next := gapi.Next{
Err: errwrap.Wrapf(err, "%s: could not start watch", Name),
Exit: true, // exit, b/c programming error?
}
ch <- next
return
}
}
for {

View File

@@ -19,6 +19,7 @@
package yamlgraph
import (
"context"
"fmt"
"strings"
@@ -168,6 +169,7 @@ func (obj *GraphConfig) Parse(data []byte) error {
// NewGraphFromConfig transforms a GraphConfig struct into a new graph.
// FIXME: remove any possibly left over, now obsolete graph diff code from here!
// TODO: add a timeout to replace context.TODO()
func (obj *GraphConfig) NewGraphFromConfig(hostname string, world engine.World, noop bool) (*pgraph.Graph, error) {
// hostname is the uuid for the host
@@ -224,7 +226,7 @@ func (obj *GraphConfig) NewGraphFromConfig(hostname string, world engine.World,
}
// store in backend (usually etcd)
if err := world.ResExport(resourceList); err != nil {
if err := world.ResExport(context.TODO(), resourceList); err != nil {
return nil, fmt.Errorf("Config: Could not export resources: %v", err)
}
@@ -239,7 +241,7 @@ func (obj *GraphConfig) NewGraphFromConfig(hostname string, world engine.World,
// database changes, we don't have a partial state of affairs...
if len(kindFilter) > 0 { // if kindFilter is empty, don't need to do lookups!
var err error
resourceList, err = world.ResCollect(hostnameFilter, kindFilter)
resourceList, err = world.ResCollect(context.TODO(), hostnameFilter, kindFilter)
if err != nil {
return nil, fmt.Errorf("Config: Could not collect resources: %v", err)
}