etcd: Rewrite embed etcd implementation
This is a giant cleanup of the etcd code. The earlier version was written when I was less experienced with golang. This is still not perfect, and does contain some races, but at least it's a decent base to start from. The automatic elastic clustering should be considered an experimental feature. If you need a more battle-tested cluster, then you should manage etcd manually and point mgmt at your existing cluster.
This commit is contained in:
24
docs/faq.md
24
docs/faq.md
@@ -215,23 +215,25 @@ requires a number of seconds as an argument.
|
||||
./mgmt run lang --lang examples/lang/hello0.mcl --converged-timeout=5
|
||||
```
|
||||
|
||||
### What does the error message about an inconsistent dataDir mean?
|
||||
### On startup `mgmt` hangs after: `etcd: server: starting...`.
|
||||
|
||||
If you get an error message similar to:
|
||||
|
||||
```
|
||||
Etcd: Connect: CtxError...
|
||||
Etcd: CtxError: Reason: CtxDelayErr(5s): No endpoints available yet!
|
||||
Etcd: Connect: Endpoints: []
|
||||
Etcd: The dataDir (/var/lib/mgmt/etcd) might be inconsistent or corrupt.
|
||||
etcd: server: starting...
|
||||
etcd: server: start timeout of 1m0s reached
|
||||
etcd: server: close timeout of 15s reached
|
||||
```
|
||||
|
||||
This happens when there are a series of fatal connect errors in a row. This can
|
||||
happen when you start `mgmt` using a dataDir that doesn't correspond to the
|
||||
current cluster view. As a result, the embedded etcd server never finishes
|
||||
starting up, and as a result, a default endpoint never gets added. The solution
|
||||
is to either reconcile the mistake, and if there is no important data saved, you
|
||||
can remove the etcd dataDir. This is typically `/var/lib/mgmt/etcd/member/`.
|
||||
But nothing happens afterwards, this can be due to a corrupt etcd storage
|
||||
directory. Each etcd server embedded in mgmt must have a special directory where
|
||||
it stores local state. It must not be shared by more than one individual member.
|
||||
This dir is typically `/var/lib/mgmt/etcd/member/`. If you accidentally use it
|
||||
(for example during testing) with a different cluster view, then you can corrupt
|
||||
it. This can happen if you use it with more than one different hostname.
|
||||
|
||||
The solution is to avoid making this mistake, and if there is no important data
|
||||
saved, you can remove the etcd member dir and start over.
|
||||
|
||||
### On running `make` to build a new version, it errors with: `Text file busy`.
|
||||
|
||||
|
||||
@@ -62,6 +62,13 @@ type Engine struct {
|
||||
// If the struct does not validate, or it cannot initialize, then this errors.
|
||||
// Initially it will contain an empty graph.
|
||||
func (obj *Engine) Init() error {
|
||||
if obj.Program == "" {
|
||||
return fmt.Errorf("the Program is empty")
|
||||
}
|
||||
if obj.Hostname == "" {
|
||||
return fmt.Errorf("the Hostname is empty")
|
||||
}
|
||||
|
||||
var err error
|
||||
if obj.graph, err = pgraph.NewGraph("graph"); err != nil {
|
||||
return err
|
||||
|
||||
250
engine/resources/config_etcd.go
Normal file
250
engine/resources/config_etcd.go
Normal file
@@ -0,0 +1,250 @@
|
||||
// Mgmt
|
||||
// Copyright (C) 2013-2019+ James Shubin and the project contributors
|
||||
// Written by James Shubin <james@shubin.ca> and the project contributors
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package resources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/purpleidea/mgmt/engine"
|
||||
"github.com/purpleidea/mgmt/engine/traits"
|
||||
"github.com/purpleidea/mgmt/util"
|
||||
"github.com/purpleidea/mgmt/util/errwrap"
|
||||
)
|
||||
|
||||
func init() {
|
||||
engine.RegisterResource("config:etcd", func() engine.Res { return &ConfigEtcdRes{} })
|
||||
}
|
||||
|
||||
const (
|
||||
sizeCheckApplyTimeout = 5 * time.Second
|
||||
)
|
||||
|
||||
// ConfigEtcdRes is a resource that sets mgmt's etcd configuration.
|
||||
type ConfigEtcdRes struct {
|
||||
traits.Base // add the base methods without re-implementation
|
||||
|
||||
init *engine.Init
|
||||
|
||||
// IdealClusterSize is the requested minimum size of the cluster. If you
|
||||
// set this to zero, it will cause a cluster wide shutdown if
|
||||
// AllowSizeShutdown is true. If it's not true, then it will cause a
|
||||
// validation error.
|
||||
IdealClusterSize uint16 `lang:"idealclustersize"`
|
||||
// AllowSizeShutdown is a required safety flag that you must set to true
|
||||
// if you want to allow causing a cluster shutdown by setting
|
||||
// IdealClusterSize to zero.
|
||||
AllowSizeShutdown bool `lang:"allow_size_shutdown"`
|
||||
|
||||
// sizeFlag determines whether sizeCheckApply already ran or not.
|
||||
sizeFlag bool
|
||||
|
||||
interruptChan chan struct{}
|
||||
wg *sync.WaitGroup
|
||||
}
|
||||
|
||||
// Default returns some sensible defaults for this resource.
|
||||
func (obj *ConfigEtcdRes) Default() engine.Res {
|
||||
return &ConfigEtcdRes{}
|
||||
}
|
||||
|
||||
// Validate if the params passed in are valid data.
|
||||
func (obj *ConfigEtcdRes) Validate() error {
|
||||
if obj.IdealClusterSize < 0 {
|
||||
return fmt.Errorf("the IdealClusterSize param must be positive")
|
||||
}
|
||||
|
||||
if obj.IdealClusterSize == 0 && !obj.AllowSizeShutdown {
|
||||
return fmt.Errorf("the IdealClusterSize can't be zero if AllowSizeShutdown is false")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Init runs some startup code for this resource.
|
||||
func (obj *ConfigEtcdRes) Init(init *engine.Init) error {
|
||||
obj.init = init // save for later
|
||||
|
||||
obj.interruptChan = make(chan struct{})
|
||||
obj.wg = &sync.WaitGroup{}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close is run by the engine to clean up after the resource is done.
|
||||
func (obj *ConfigEtcdRes) Close() error {
|
||||
obj.wg.Wait() // bonus
|
||||
return nil
|
||||
}
|
||||
|
||||
// Watch is the primary listener for this resource and it outputs events.
|
||||
func (obj *ConfigEtcdRes) Watch() error {
|
||||
obj.wg.Add(1)
|
||||
defer obj.wg.Done()
|
||||
// FIXME: add timeout to context
|
||||
// The obj.init.Done channel is closed by the engine to signal shutdown.
|
||||
ctx, cancel := util.ContextWithCloser(context.Background(), obj.init.Done)
|
||||
defer cancel()
|
||||
ch, err := obj.init.World.IdealClusterSizeWatch(util.CtxWithWg(ctx, obj.wg))
|
||||
if err != nil {
|
||||
return errwrap.Wrapf(err, "could not watch ideal cluster size")
|
||||
}
|
||||
|
||||
obj.init.Running() // when started, notify engine that we're running
|
||||
|
||||
Loop:
|
||||
for {
|
||||
select {
|
||||
case event, ok := <-ch:
|
||||
if !ok {
|
||||
break Loop
|
||||
}
|
||||
if obj.init.Debug {
|
||||
obj.init.Logf("event: %+v", event)
|
||||
}
|
||||
// pass through and send an event
|
||||
|
||||
case <-obj.init.Done: // closed by the engine to signal shutdown
|
||||
}
|
||||
|
||||
obj.init.Event() // notify engine of an event (this can block)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// sizeCheckApply sets the IdealClusterSize parameter. If it sees a value change
|
||||
// to zero, then it *won't* try and change it away from zero, because it assumes
|
||||
// that someone has requested a shutdown. If the value is seen on first startup,
|
||||
// then it will change it, because it might be a zero from the previous cluster.
|
||||
func (obj *ConfigEtcdRes) sizeCheckApply(apply bool) (bool, error) {
|
||||
wg := &sync.WaitGroup{}
|
||||
defer wg.Wait() // this must be above the defer cancel() call
|
||||
ctx, cancel := context.WithTimeout(context.Background(), sizeCheckApplyTimeout)
|
||||
defer cancel()
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
select {
|
||||
case <-obj.interruptChan:
|
||||
cancel()
|
||||
case <-ctx.Done():
|
||||
// let this exit
|
||||
}
|
||||
}()
|
||||
|
||||
val, err := obj.init.World.IdealClusterSizeGet(ctx)
|
||||
if err != nil {
|
||||
return false, errwrap.Wrapf(err, "could not get ideal cluster size")
|
||||
}
|
||||
|
||||
// if we got a value of zero, and we've already run before, then it's ok
|
||||
if obj.IdealClusterSize != 0 && val == 0 && obj.sizeFlag {
|
||||
obj.init.Logf("impending cluster shutdown, not setting ideal cluster size")
|
||||
return true, nil // impending shutdown, don't try and cancel it.
|
||||
}
|
||||
obj.sizeFlag = true
|
||||
|
||||
// must be done after setting the above flag
|
||||
if obj.IdealClusterSize == val { // state is correct
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if !apply {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// set!
|
||||
// This is run as a transaction so we detect if we needed to change it.
|
||||
changed, err := obj.init.World.IdealClusterSizeSet(ctx, obj.IdealClusterSize)
|
||||
if err != nil {
|
||||
return false, errwrap.Wrapf(err, "could not set ideal cluster size")
|
||||
}
|
||||
if !changed {
|
||||
return true, nil // we lost a race, which means no change needed
|
||||
}
|
||||
obj.init.Logf("set dynamic cluster size to: %d", obj.IdealClusterSize)
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// CheckApply method for Noop resource. Does nothing, returns happy!
|
||||
func (obj *ConfigEtcdRes) CheckApply(apply bool) (bool, error) {
|
||||
checkOK := true
|
||||
|
||||
if c, err := obj.sizeCheckApply(apply); err != nil {
|
||||
return false, err
|
||||
} else if !c {
|
||||
checkOK = false
|
||||
}
|
||||
|
||||
// TODO: add more config settings management here...
|
||||
//if c, err := obj.TODOCheckApply(apply); err != nil {
|
||||
// return false, err
|
||||
//} else if !c {
|
||||
// checkOK = false
|
||||
//}
|
||||
|
||||
return checkOK, nil // w00t
|
||||
}
|
||||
|
||||
// Cmp compares two resources and returns an error if they are not equivalent.
|
||||
func (obj *ConfigEtcdRes) Cmp(r engine.Res) error {
|
||||
// we can only compare ConfigEtcdRes to others of the same resource kind
|
||||
res, ok := r.(*ConfigEtcdRes)
|
||||
if !ok {
|
||||
return fmt.Errorf("not a %s", obj.Kind())
|
||||
}
|
||||
|
||||
if obj.IdealClusterSize != res.IdealClusterSize {
|
||||
return fmt.Errorf("the IdealClusterSize param differs")
|
||||
}
|
||||
if obj.AllowSizeShutdown != res.AllowSizeShutdown {
|
||||
return fmt.Errorf("the AllowSizeShutdown param differs")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Interrupt is called to ask the execution of this resource to end early.
|
||||
func (obj *ConfigEtcdRes) Interrupt() error {
|
||||
close(obj.interruptChan)
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnmarshalYAML is the custom unmarshal handler for this struct.
|
||||
// It is primarily useful for setting the defaults.
|
||||
func (obj *ConfigEtcdRes) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
type rawRes ConfigEtcdRes // indirection to avoid infinite recursion
|
||||
|
||||
def := obj.Default() // get the default
|
||||
res, ok := def.(*ConfigEtcdRes) // put in the right format
|
||||
if !ok {
|
||||
return fmt.Errorf("could not convert to ConfigEtcdRes")
|
||||
}
|
||||
raw := rawRes(*res) // convert; the defaults go here
|
||||
|
||||
if err := unmarshal(&raw); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*obj = ConfigEtcdRes(raw) // restore from indirection with type conversion!
|
||||
return nil
|
||||
}
|
||||
@@ -18,11 +18,15 @@
|
||||
package resources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/purpleidea/mgmt/engine"
|
||||
"github.com/purpleidea/mgmt/engine/traits"
|
||||
"github.com/purpleidea/mgmt/util"
|
||||
"github.com/purpleidea/mgmt/util/errwrap"
|
||||
)
|
||||
|
||||
@@ -39,6 +43,10 @@ const (
|
||||
SkipCmpStyleString
|
||||
)
|
||||
|
||||
const (
|
||||
kvCheckApplyTimeout = 5 * time.Second
|
||||
)
|
||||
|
||||
// KVRes is a resource which writes a key/value pair into cluster wide storage.
|
||||
// It will ensure that the key is set to the requested value. The one exception
|
||||
// is that if you use the SkipLessThan parameter, then it will only replace the
|
||||
@@ -67,6 +75,8 @@ type KVRes struct {
|
||||
// the value is greater when using the SkipLessThan parameter.
|
||||
SkipCmpStyle KVResSkipCmpStyle `lang:"skipcmpstyle" yaml:"skipcmpstyle"`
|
||||
|
||||
interruptChan chan struct{}
|
||||
|
||||
// TODO: does it make sense to have different backends here? (eg: local)
|
||||
}
|
||||
|
||||
@@ -107,6 +117,8 @@ func (obj *KVRes) Validate() error {
|
||||
func (obj *KVRes) Init(init *engine.Init) error {
|
||||
obj.init = init // save for later
|
||||
|
||||
obj.interruptChan = make(chan struct{})
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -117,9 +129,17 @@ func (obj *KVRes) Close() error {
|
||||
|
||||
// Watch is the primary listener for this resource and it outputs events.
|
||||
func (obj *KVRes) Watch() error {
|
||||
obj.init.Running() // when started, notify engine that we're running
|
||||
// FIXME: add timeout to context
|
||||
// The obj.init.Done channel is closed by the engine to signal shutdown.
|
||||
ctx, cancel := util.ContextWithCloser(context.Background(), obj.init.Done)
|
||||
defer cancel()
|
||||
|
||||
ch := obj.init.World.StrMapWatch(obj.getKey()) // get possible events!
|
||||
ch, err := obj.init.World.StrMapWatch(ctx, obj.getKey()) // get possible events!
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
obj.init.Running() // when started, notify engine that we're running
|
||||
|
||||
var send = false // send event?
|
||||
for {
|
||||
@@ -191,13 +211,28 @@ func (obj *KVRes) lessThanCheck(value string) (bool, error) {
|
||||
func (obj *KVRes) CheckApply(apply bool) (bool, error) {
|
||||
obj.init.Logf("CheckApply(%t)", apply)
|
||||
|
||||
wg := &sync.WaitGroup{}
|
||||
defer wg.Wait() // this must be above the defer cancel() call
|
||||
ctx, cancel := context.WithTimeout(context.Background(), kvCheckApplyTimeout)
|
||||
defer cancel()
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
select {
|
||||
case <-obj.interruptChan:
|
||||
cancel()
|
||||
case <-ctx.Done():
|
||||
// let this exit
|
||||
}
|
||||
}()
|
||||
|
||||
if val, exists := obj.init.Recv()["Value"]; exists && val.Changed {
|
||||
// if we received on Value, and it changed, wooo, nothing to do.
|
||||
obj.init.Logf("CheckApply: `Value` was updated!")
|
||||
}
|
||||
|
||||
hostname := obj.init.Hostname // me
|
||||
keyMap, err := obj.init.World.StrMapGet(obj.getKey())
|
||||
keyMap, err := obj.init.World.StrMapGet(ctx, obj.getKey())
|
||||
if err != nil {
|
||||
return false, errwrap.Wrapf(err, "check error during StrGet")
|
||||
}
|
||||
@@ -217,7 +252,7 @@ func (obj *KVRes) CheckApply(apply bool) (bool, error) {
|
||||
return true, nil // nothing to delete, we're good!
|
||||
|
||||
} else if ok && obj.Value == nil { // delete
|
||||
err := obj.init.World.StrMapDel(obj.getKey())
|
||||
err := obj.init.World.StrMapDel(ctx, obj.getKey())
|
||||
return false, errwrap.Wrapf(err, "apply error during StrDel")
|
||||
}
|
||||
|
||||
@@ -225,7 +260,7 @@ func (obj *KVRes) CheckApply(apply bool) (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if err := obj.init.World.StrMapSet(obj.getKey(), *obj.Value); err != nil {
|
||||
if err := obj.init.World.StrMapSet(ctx, obj.getKey(), *obj.Value); err != nil {
|
||||
return false, errwrap.Wrapf(err, "apply error during StrSet")
|
||||
}
|
||||
|
||||
@@ -261,6 +296,12 @@ func (obj *KVRes) Cmp(r engine.Res) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Interrupt is called to ask the execution of this resource to end early.
|
||||
func (obj *KVRes) Interrupt() error {
|
||||
close(obj.interruptChan)
|
||||
return nil
|
||||
}
|
||||
|
||||
// KVUID is the UID struct for KVRes.
|
||||
type KVUID struct {
|
||||
engine.BaseUID
|
||||
|
||||
@@ -18,6 +18,8 @@
|
||||
package engine
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/purpleidea/mgmt/etcd/scheduler"
|
||||
)
|
||||
|
||||
@@ -25,22 +27,26 @@ import (
|
||||
// the GAPI to store state and exchange information throughout the cluster. It
|
||||
// is the interface each machine uses to communicate with the rest of the world.
|
||||
type World interface { // TODO: is there a better name for this interface?
|
||||
ResWatch() chan error
|
||||
ResExport([]Res) error
|
||||
ResWatch(context.Context) (chan error, error)
|
||||
ResExport(context.Context, []Res) error
|
||||
// FIXME: should this method take a "filter" data struct instead of many args?
|
||||
ResCollect(hostnameFilter, kindFilter []string) ([]Res, error)
|
||||
ResCollect(ctx context.Context, hostnameFilter, kindFilter []string) ([]Res, error)
|
||||
|
||||
StrWatch(namespace string) chan error
|
||||
IdealClusterSizeWatch(context.Context) (chan error, error)
|
||||
IdealClusterSizeGet(context.Context) (uint16, error)
|
||||
IdealClusterSizeSet(context.Context, uint16) (bool, error)
|
||||
|
||||
StrWatch(ctx context.Context, namespace string) (chan error, error)
|
||||
StrIsNotExist(error) bool
|
||||
StrGet(namespace string) (string, error)
|
||||
StrSet(namespace, value string) error
|
||||
StrDel(namespace string) error
|
||||
StrGet(ctx context.Context, namespace string) (string, error)
|
||||
StrSet(ctx context.Context, namespace, value string) error
|
||||
StrDel(ctx context.Context, namespace string) error
|
||||
|
||||
// XXX: add the exchange primitives in here directly?
|
||||
StrMapWatch(namespace string) chan error
|
||||
StrMapGet(namespace string) (map[string]string, error)
|
||||
StrMapSet(namespace, value string) error
|
||||
StrMapDel(namespace string) error
|
||||
StrMapWatch(ctx context.Context, namespace string) (chan error, error)
|
||||
StrMapGet(ctx context.Context, namespace string) (map[string]string, error)
|
||||
StrMapSet(ctx context.Context, namespace, value string) error
|
||||
StrMapDel(ctx context.Context, namespace string) error
|
||||
|
||||
Scheduler(namespace string, opts ...scheduler.Option) (*scheduler.Result, error)
|
||||
|
||||
|
||||
497
etcd/callback.go
Normal file
497
etcd/callback.go
Normal file
@@ -0,0 +1,497 @@
|
||||
// Mgmt
|
||||
// Copyright (C) 2013-2019+ James Shubin and the project contributors
|
||||
// Written by James Shubin <james@shubin.ca> and the project contributors
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package etcd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"github.com/purpleidea/mgmt/etcd/interfaces"
|
||||
"github.com/purpleidea/mgmt/util"
|
||||
"github.com/purpleidea/mgmt/util/errwrap"
|
||||
|
||||
etcd "github.com/coreos/etcd/clientv3" // "clientv3"
|
||||
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
||||
)
|
||||
|
||||
// nominateApply applies the changed watcher data onto our local caches.
|
||||
func (obj *EmbdEtcd) nominateApply(data *interfaces.WatcherData) error {
|
||||
if data == nil { // ignore empty data
|
||||
return nil
|
||||
}
|
||||
|
||||
// If we tried to lookup the nominated members here (in etcd v3) this
|
||||
// would sometimes block because we would lose the cluster leader once
|
||||
// the current leader calls the MemberAdd API and it steps down trying
|
||||
// to form a two host cluster. Instead, we can look at the event
|
||||
// response data to read the nominated values! Since we only see what
|
||||
// has *changed* in the response data, we have to keep track of the
|
||||
// original state and apply the deltas. This must be idempotent in case
|
||||
// it errors and is called again. If we're retrying and we get a data
|
||||
// format error, it's probably not the end of the world.
|
||||
nominated, err := applyDeltaEvents(data, obj.nominated) // map[hostname]URLs (URLsMap)
|
||||
if err != nil && err != errInconsistentApply { // allow missing deletes
|
||||
return err // unexpected error, fail
|
||||
}
|
||||
// TODO: do we want to sort this if it becomes a list instead of a map?
|
||||
//sort.Strings(nominated) // deterministic order
|
||||
obj.nominated = nominated
|
||||
return nil
|
||||
}
|
||||
|
||||
// volunteerApply applies the changed watcher data onto our local caches.
|
||||
func (obj *EmbdEtcd) volunteerApply(data *interfaces.WatcherData) error {
|
||||
if data == nil { // ignore empty data
|
||||
return nil
|
||||
}
|
||||
volunteers, err := applyDeltaEvents(data, obj.volunteers) // map[hostname]URLs (URLsMap)
|
||||
if err != nil && err != errInconsistentApply { // allow missing deletes
|
||||
return err // unexpected error, fail
|
||||
}
|
||||
// TODO: do we want to sort this if it becomes a list instead of a map?
|
||||
//sort.Strings(volunteers) // deterministic order
|
||||
obj.volunteers = volunteers
|
||||
return nil
|
||||
}
|
||||
|
||||
// endpointApply applies the changed watcher data onto our local caches. In this
|
||||
// particular apply function, it also sets our client with the new endpoints.
|
||||
func (obj *EmbdEtcd) endpointApply(data *interfaces.WatcherData) error {
|
||||
if data == nil { // ignore empty data
|
||||
return nil
|
||||
}
|
||||
endpoints, err := applyDeltaEvents(data, obj.endpoints) // map[hostname]URLs (URLsMap)
|
||||
if err != nil && err != errInconsistentApply { // allow missing deletes
|
||||
return err // unexpected error, fail
|
||||
}
|
||||
|
||||
// is the endpoint list different?
|
||||
if err := cmpURLsMap(obj.endpoints, endpoints); err != nil {
|
||||
obj.endpoints = endpoints // set
|
||||
// can happen if a server drops out for example
|
||||
obj.Logf("endpoint list changed to: %+v", endpoints)
|
||||
obj.setEndpoints()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// nominateCb runs to respond to the nomination list change events.
|
||||
// Functionally, it controls the starting and stopping of the server process. If
|
||||
// a nominate message is received for this machine, then it means it is already
|
||||
// being added to the cluster with member add and the cluster is now waiting for
|
||||
// it to start up. When a nominate entry is removed, it's up to this function to
|
||||
// run the member remove right before it shuts its server down.
|
||||
func (obj *EmbdEtcd) nominateCb(ctx context.Context) error {
|
||||
// Ensure that only one copy of this function is run simultaneously.
|
||||
// This is because we don't want to cause runServer to race with
|
||||
// destroyServer. Let us completely start up before we can cancel it. As
|
||||
// a special case, destroyServer itself can race against itself. I don't
|
||||
// think it's possible for contention on this mutex, but we'll leave it
|
||||
// in for safety.
|
||||
obj.nominatedMutex.Lock()
|
||||
defer obj.nominatedMutex.Unlock()
|
||||
// This ordering mutex is being added for safety, since there is no good
|
||||
// reason for this function and volunteerCb to run simultaneously, and
|
||||
// it might be preventing a race condition that was happening.
|
||||
obj.orderingMutex.Lock()
|
||||
defer obj.orderingMutex.Unlock()
|
||||
if obj.Debug {
|
||||
obj.Logf("nominateCb")
|
||||
defer obj.Logf("nominateCb: done!")
|
||||
}
|
||||
|
||||
// check if i have actually volunteered first of all...
|
||||
if obj.NoServer || len(obj.ServerURLs) == 0 {
|
||||
obj.Logf("inappropriately nominated, rogue or stale server?")
|
||||
// TODO: should we un-nominate ourself?
|
||||
return nil // we've done our job successfully
|
||||
}
|
||||
|
||||
// This can happen when we're shutting down, build the nominated value.
|
||||
if len(obj.nominated) == 0 {
|
||||
obj.Logf("list of nominations is empty")
|
||||
//return nil // don't exit, we might want to shutdown the server
|
||||
} else {
|
||||
obj.Logf("nominated: %v", obj.nominated)
|
||||
}
|
||||
|
||||
// if there are no other peers, we create a new server
|
||||
// TODO: do we need an || len(obj.nominated) == 0 if we're the first?
|
||||
_, exists := obj.nominated[obj.Hostname] // am i nominated?
|
||||
newCluster := len(obj.nominated) == 1 && exists
|
||||
if obj.Debug {
|
||||
obj.Logf("nominateCb: newCluster: %t; exists: %t; obj.server == nil: %t", newCluster, exists, obj.server == nil)
|
||||
}
|
||||
|
||||
// TODO: server start retries should be handled inside of runServer...
|
||||
if obj.serverAction(serverActionStart) { // start
|
||||
// no server is running, but it should be
|
||||
wg := &sync.WaitGroup{}
|
||||
serverReady, ackReady := obj.ServerReady() // must call ack!
|
||||
serverExited, ackExited := obj.ServerExited() // must call ack!
|
||||
|
||||
var sendError = false
|
||||
var serverErr error
|
||||
obj.Logf("waiting for server...")
|
||||
nominated, err := copyURLsMap(obj.nominated)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
obj.errExitN = make(chan struct{})
|
||||
defer close(obj.errExitN) // multi-signal for errChan close op
|
||||
// blocks until server exits
|
||||
serverErr = obj.runServer(newCluster, nominated)
|
||||
// in case this exits on its own instead of with destroy
|
||||
defer obj.destroyServer() // run to reset some values
|
||||
if sendError && serverErr != nil { // exited with an error
|
||||
select {
|
||||
case obj.errChan <- errwrap.Wrapf(serverErr, "runServer errored"):
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// block until either server is ready or an early exit occurs
|
||||
select {
|
||||
case <-serverReady:
|
||||
// detach from our local return of errors from an early
|
||||
// server exit (pre server ready) and switch to channel
|
||||
sendError = true // gets set before the ackReady() does
|
||||
ackReady() // must be called
|
||||
ackExited() // must be called
|
||||
// pass
|
||||
|
||||
case <-serverExited:
|
||||
ackExited() // must be called
|
||||
ackReady() // must be called
|
||||
|
||||
wg.Wait() // wait for server to finish to get early err
|
||||
return serverErr
|
||||
}
|
||||
|
||||
// Once the server is online, we *must* publish this information
|
||||
// so that (1) others know where to connect to us (2) we provide
|
||||
// an "event" for member add since there is not any event that's
|
||||
// currently built-in to etcd and (3) so we have a key to expire
|
||||
// when we shutdown or crash to give us the member remove event.
|
||||
// please see issue: https://github.com/coreos/etcd/issues/5277
|
||||
|
||||
} else if obj.serverAction(serverActionStop) { // stop?
|
||||
// server is running, but it should not be
|
||||
|
||||
// i have been un-nominated, remove self and shutdown server!
|
||||
// we don't need to do a member remove if i'm the last one...
|
||||
if len(obj.nominated) != 0 { // don't call if nobody left but me!
|
||||
// work around: https://github.com/coreos/etcd/issues/5482
|
||||
// and it might make sense to avoid it if we're the last
|
||||
obj.Logf("member remove: removing self: %d", obj.serverID)
|
||||
resp, err := obj.memberRemove(ctx, obj.serverID)
|
||||
if err != nil {
|
||||
if obj.Debug {
|
||||
obj.Logf("error with member remove: %v", err)
|
||||
}
|
||||
return errwrap.Wrapf(err, "member remove error")
|
||||
}
|
||||
if resp != nil {
|
||||
obj.Logf("member removed (self): %s (%d)", obj.Hostname, obj.serverID)
|
||||
if err := obj.updateMemberState(resp.Members); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: if we fail on destroy should we try to run some of the
|
||||
// other cleanup tasks that usually afterwards (below) anyways ?
|
||||
if err := obj.destroyServer(); err != nil { // sync until exited
|
||||
return errwrap.Wrapf(err, "destroyServer errored")
|
||||
}
|
||||
|
||||
// We close with this special sentinel only during destroy/exit.
|
||||
if obj.closing {
|
||||
return interfaces.ErrShutdown
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// volunteerCb runs to respond to the volunteer list change events.
|
||||
// Functionally, it controls the nominating and adding of members. It typically
|
||||
// nominates a peer so that it knows it will get to be a server, which causes it
|
||||
// to start up its server. It also runs the member add operation so that the
|
||||
// cluster gets quorum safely. The member remove operation is typically run in
|
||||
// the nominateCb of that server when it is asked to shutdown. This occurs when
|
||||
// the nominate entry for that server is removed. If a server removes its
|
||||
// volunteer entry we must respond by removing the nomination so that it can
|
||||
// receive that message and shutdown.
|
||||
// FIXME: we might need to respond to member change/disconnect/shutdown events,
|
||||
// see: https://github.com/coreos/etcd/issues/5277
|
||||
// XXX: Don't allow this function to partially run if it is canceled part way
|
||||
// through... We don't want an inconsistent state where we did unnominate, but
|
||||
// didn't remove a member...
|
||||
// XXX: If the leader changes, do we need to kick the volunteerCb or anything
|
||||
// else that might have required a leader and which returned because it did not
|
||||
// have one, thus loosing an event?
|
||||
func (obj *EmbdEtcd) volunteerCb(ctx context.Context) error {
|
||||
// Ensure that only one copy of this function is run simultaneously.
|
||||
// It's not entirely clear if this can ever happen or if it's needed,
|
||||
// but it's an inexpensive safety check that we can add in for now.
|
||||
obj.volunteerMutex.Lock()
|
||||
defer obj.volunteerMutex.Unlock()
|
||||
// This ordering mutex is being added for safety, since there is no good
|
||||
// reason for this function and nominateCb to run simultaneously, and it
|
||||
// might be preventing a race condition that was happening.
|
||||
obj.orderingMutex.Lock()
|
||||
defer obj.orderingMutex.Unlock()
|
||||
if obj.Debug {
|
||||
obj.Logf("volunteerCb")
|
||||
defer obj.Logf("volunteerCb: done!")
|
||||
}
|
||||
|
||||
// FIXME: are there any situations where we don't want to short circuit
|
||||
// here, such as if i'm the last node?
|
||||
if obj.server == nil {
|
||||
if obj.Debug {
|
||||
obj.Logf("i'm not a server yet...")
|
||||
}
|
||||
return nil // if i'm not a server, i'm not a leader, return
|
||||
}
|
||||
|
||||
// FIXME: Instead of checking this, assume yes, and use the
|
||||
// `WithRequireLeader` wrapper, and just ignore the error from that if
|
||||
// it's wrong... Combined with events that poke this volunteerCb when
|
||||
// the leader changes, we shouldn't miss any events...
|
||||
if isLeader, err := obj.isLeader(ctx); err != nil { // XXX: race!
|
||||
return errwrap.Wrapf(err, "error determining leader")
|
||||
} else if !isLeader {
|
||||
if obj.Debug {
|
||||
obj.Logf("we are not the leader...")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
// i am the leader!
|
||||
|
||||
// Remember that the member* operations return the membership, so this
|
||||
// means we don't need to run an extra memberList in those scenarios...
|
||||
// However, this can get out of sync easily, so ensure that our member
|
||||
// information is very recent.
|
||||
if err := obj.memberStateFromList(ctx); err != nil {
|
||||
return errwrap.Wrapf(err, "error during state sync")
|
||||
}
|
||||
// XXX: If we have any unstarted members here, do we want to reschedule
|
||||
// this volunteerCb in a moment? Or will we get another event anyways?
|
||||
|
||||
// NOTE: There used to be an is_leader check right here...
|
||||
// FIXME: Should we use WithRequireLeader instead? Here? Elsewhere?
|
||||
// https://godoc.org/github.com/coreos/etcd/clientv3#WithRequireLeader
|
||||
|
||||
// FIXME: can this happen, and if so, is it an error or a pass-through?
|
||||
if len(obj.volunteers) == 0 {
|
||||
obj.Logf("list of volunteers is empty")
|
||||
//return fmt.Errorf("volunteer list is empty")
|
||||
} else {
|
||||
obj.Logf("volunteers: %+v", obj.volunteers)
|
||||
}
|
||||
|
||||
// TODO: do we really need to check these errors?
|
||||
m, err := copyURLsMap(obj.membermap) // list of members...
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
v, err := copyURLsMap(obj.volunteers)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Unnominate anyone that unvolunteers, so they can shutdown cleanly...
|
||||
// FIXME: one step at a time... do we trigger subsequent steps somehow?
|
||||
obj.Logf("chooser: (%+v)/(%+v)", m, v)
|
||||
nominate, unnominate, err := obj.Chooser.Choose(m, v)
|
||||
if err != nil {
|
||||
return errwrap.Wrapf(err, "chooser error")
|
||||
}
|
||||
|
||||
// Ensure that we are the *last* in the list if we're unnominating, and
|
||||
// the *first* in the list if we're nominating. This way, we self-remove
|
||||
// last, and we self-add first. This is least likely to hurt quorum.
|
||||
headFn := func(x string) bool {
|
||||
return x != obj.Hostname
|
||||
}
|
||||
tailFn := func(x string) bool {
|
||||
return x == obj.Hostname
|
||||
}
|
||||
nominate = util.PriorityStrSliceSort(nominate, headFn)
|
||||
unnominate = util.PriorityStrSliceSort(unnominate, tailFn)
|
||||
obj.Logf("chooser result(+/-): %+v/%+v", nominate, unnominate)
|
||||
var reterr error
|
||||
leaderCtx := ctx // default ctx to use
|
||||
if RequireLeaderCtx {
|
||||
leaderCtx = etcd.WithRequireLeader(ctx) // FIXME: Is this correct?
|
||||
}
|
||||
|
||||
for i := range nominate {
|
||||
member := nominate[i]
|
||||
peerURLs, exists := obj.volunteers[member] // comma separated list of urls
|
||||
if !exists {
|
||||
// if this happens, do we have an update race?
|
||||
return fmt.Errorf("could not find member `%s` in volunteers map", member)
|
||||
}
|
||||
|
||||
// NOTE: storing peerURLs when they're already in volunteers/ is
|
||||
// redundant, but it seems to be necessary for a sane algorithm.
|
||||
// nominate before we call the API so that members see it first!
|
||||
if err := obj.nominate(leaderCtx, member, peerURLs); err != nil {
|
||||
return errwrap.Wrapf(err, "error nominating: %s", member)
|
||||
}
|
||||
// XXX: can we add a ttl here, because once we nominate someone,
|
||||
// we need to give them up to N seconds to start up after we run
|
||||
// the MemberAdd API because if they don't, in some situations
|
||||
// such as if we're adding the second node to the cluster, then
|
||||
// we've lost quorum until a second member joins! If the TTL
|
||||
// expires, we need to MemberRemove! In this special case, we
|
||||
// need to forcefully remove the second member if we don't add
|
||||
// them, because we'll be in a lack of quorum state and unable
|
||||
// to do anything... As a result, we should always only add ONE
|
||||
// member at a time!
|
||||
|
||||
// XXX: After we memberAdd, can we wait a timeout, and then undo
|
||||
// the add if the member doesn't come up? We'd also need to run
|
||||
// an unnominate too, and mark the node as temporarily failed...
|
||||
obj.Logf("member add: %s: %v", member, peerURLs)
|
||||
resp, err := obj.memberAdd(leaderCtx, peerURLs)
|
||||
if err != nil {
|
||||
// FIXME: On on error this function needs to run again,
|
||||
// because we need to make sure to add the member here!
|
||||
return errwrap.Wrapf(err, "member add error")
|
||||
}
|
||||
if resp != nil { // if we're already the right state, we get nil
|
||||
obj.Logf("member added: %s (%d): %v", member, resp.Member.ID, peerURLs)
|
||||
if err := obj.updateMemberState(resp.Members); err != nil {
|
||||
return err
|
||||
}
|
||||
if resp.Member.Name == "" { // not started instantly ;)
|
||||
obj.addMemberState(member, resp.Member.ID, peerURLs, nil)
|
||||
}
|
||||
// TODO: would this ever happen or be necessary?
|
||||
//if member == obj.Hostname {
|
||||
// obj.addSelfState()
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
// we must remove them from the members API or it will look like a crash
|
||||
if l := len(unnominate); l > 0 {
|
||||
obj.Logf("unnominated: shutting down %d members...", l)
|
||||
}
|
||||
for i := range unnominate {
|
||||
member := unnominate[i]
|
||||
memberID, exists := obj.memberIDs[member] // map[string]uint64
|
||||
if !exists {
|
||||
// if this happens, do we have an update race?
|
||||
return fmt.Errorf("could not find member `%s` in memberIDs map", member)
|
||||
}
|
||||
|
||||
// start a watcher to know if member was added
|
||||
cancelCtx, cancel := context.WithCancel(leaderCtx)
|
||||
defer cancel()
|
||||
timeout := util.CloseAfter(cancelCtx, SelfRemoveTimeout) // chan closes
|
||||
fn := func(members []*pb.Member) error {
|
||||
for _, m := range members {
|
||||
if m.Name == member || m.ID == memberID {
|
||||
return fmt.Errorf("still present")
|
||||
}
|
||||
}
|
||||
|
||||
return nil // not found!
|
||||
}
|
||||
ch, err := obj.memberChange(cancelCtx, fn, MemberChangeInterval)
|
||||
if err != nil {
|
||||
return errwrap.Wrapf(err, "error watching for change of: %s", member)
|
||||
}
|
||||
if err := obj.nominate(leaderCtx, member, nil); err != nil { // unnominate
|
||||
return errwrap.Wrapf(err, "error unnominating: %s", member)
|
||||
}
|
||||
// Once we issue the above unnominate, that peer will
|
||||
// shutdown, and this might cause us to loose quorum,
|
||||
// therefore, let that member remove itself, and then
|
||||
// double check that it did happen in case delinquent.
|
||||
// TODO: get built-in transactional member Add/Remove
|
||||
// functionality to avoid a separate nominate list...
|
||||
|
||||
// If we're removing ourself, then let the (un)nominate callback
|
||||
// do it. That way it removes itself cleanly on server shutdown.
|
||||
if member == obj.Hostname { // remove in unnominate!
|
||||
cancel()
|
||||
obj.Logf("unnominate: removing self...")
|
||||
continue
|
||||
}
|
||||
|
||||
// cancel remove sleep and unblock early on event...
|
||||
obj.Logf("waiting %s for %s to self remove...", SelfRemoveTimeout.String(), member)
|
||||
select {
|
||||
case <-timeout:
|
||||
// pass
|
||||
case err, ok := <-ch:
|
||||
if ok {
|
||||
select {
|
||||
case <-timeout:
|
||||
// wait until timeout finishes
|
||||
}
|
||||
reterr = errwrap.Append(reterr, err)
|
||||
}
|
||||
// removed quickly!
|
||||
}
|
||||
cancel()
|
||||
|
||||
// In case the removed member doesn't remove itself, do it!
|
||||
resp, err := obj.memberRemove(leaderCtx, memberID)
|
||||
if err != nil {
|
||||
return errwrap.Wrapf(err, "member remove error")
|
||||
}
|
||||
if resp != nil {
|
||||
obj.Logf("member removed (forced): %s (%d)", member, memberID)
|
||||
if err := obj.updateMemberState(resp.Members); err != nil {
|
||||
return err
|
||||
}
|
||||
// Do this I guess, but the TTL will eventually get it.
|
||||
// Remove the other member to avoid client connections.
|
||||
if err := obj.advertise(leaderCtx, member, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the member from our lists to avoid blocking future
|
||||
// possible MemberList calls which would try and connect to a
|
||||
// missing member... The lists should get updated from the
|
||||
// member exiting safely if it doesn't crash, but if it did
|
||||
// and/or since it's a race to see if the update event will get
|
||||
// seen before we need the new data, just do it now anyways.
|
||||
// TODO: Is the above comment still true?
|
||||
obj.rmMemberState(member) // proactively delete it
|
||||
|
||||
obj.Logf("member %s (%d) removed successfully!", member, memberID)
|
||||
}
|
||||
|
||||
// NOTE: We could ensure that etcd reconnects here, but we can just wait
|
||||
// for the endpoints callback which should see the state change instead.
|
||||
|
||||
obj.setEndpoints() // sync client with new endpoints
|
||||
return reterr
|
||||
}
|
||||
98
etcd/chooser/chooser.go
Normal file
98
etcd/chooser/chooser.go
Normal file
@@ -0,0 +1,98 @@
|
||||
// Mgmt
|
||||
// Copyright (C) 2013-2019+ James Shubin and the project contributors
|
||||
// Written by James Shubin <james@shubin.ca> and the project contributors
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package chooser
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/purpleidea/mgmt/etcd/interfaces"
|
||||
|
||||
etcdtypes "github.com/coreos/etcd/pkg/types"
|
||||
)
|
||||
|
||||
// Data represents the input data that is passed to the chooser.
|
||||
type Data struct {
|
||||
// Hostname is the hostname running this chooser instance. It can be
|
||||
// used as a unique key in the cluster.
|
||||
Hostname string // ourself
|
||||
|
||||
Debug bool
|
||||
Logf func(format string, v ...interface{})
|
||||
}
|
||||
|
||||
// Chooser represents the interface you must implement if you want to be able to
|
||||
// control which cluster members are added and removed. Remember that this can
|
||||
// get run from any peer (server) machine in the cluster, and that this may
|
||||
// change as different leaders are elected! Do not assume any state will remain
|
||||
// between invocations. If you want to maintain hysteresis or state, make sure
|
||||
// to synchronize it in etcd.
|
||||
type Chooser interface {
|
||||
// Validate validates the chooser implementation to ensure the params
|
||||
// represent a valid instantiation.
|
||||
Validate() error
|
||||
|
||||
// Init initializes the chooser and passes in some useful data and
|
||||
// handles.
|
||||
Init(*Data) error
|
||||
|
||||
// Connect will be called with a client interfaces.Client that you can
|
||||
// use if necessary to store some shared state between instances of this
|
||||
// and watch for external changes. Sharing state between members should
|
||||
// be avoided if possible, and there is no guarantee that your data
|
||||
// won't be deleted in a disaster. There are no backups for this,
|
||||
// regenerate anything you might need. Additionally, this may only be
|
||||
// used inside the Chooser method, since Connect is only called after
|
||||
// Init. This is however very useful for implementing special choosers.
|
||||
// Since some operations can run on connect, it gets a context. If you
|
||||
// cancel this context, then you might expect that Watch could die too.
|
||||
// Both of these should get cancelled if you call Disconnect.
|
||||
Connect(context.Context, interfaces.Client) error // we get given a namespaced client
|
||||
|
||||
// Disconnect tells us to cancel our use of the client interface that we
|
||||
// got from the Connect method. We must not return until we're done.
|
||||
Disconnect() error
|
||||
|
||||
// Watch is called by the engine to allow us to Watch for changes that
|
||||
// might cause us to want to re-evaluate our nomination decision. It
|
||||
// should error if it cannot startup. Once it is running, it should send
|
||||
// a nil error on every event, and an error if things go wrong. When
|
||||
// Disconnect is shutdown, then that should cause this to exit. When
|
||||
// this sends events, Choose will usually eventually get called in
|
||||
// response.
|
||||
Watch() (chan error, error)
|
||||
|
||||
// Choose takes the current peer membership state, and the available
|
||||
// volunteers, and produces a list of who we should add and who should
|
||||
// quit. In general, it's best to only remove one member at a time, in
|
||||
// particular because this will get called iteratively on future events,
|
||||
// and it can remove subsequent members on the next iteration. One
|
||||
// important note: when building a new cluster, we do assume that out of
|
||||
// one available volunteer, and no members, that this first volunteer is
|
||||
// selected. Make sure that any implementations of this function do this
|
||||
// as well, since otherwise the hardcoded initial assumption would be
|
||||
// proven wrong here!
|
||||
// TODO: we could pass in two lists of hostnames instead of the full
|
||||
// URLsMap here, but let's keep it more complicated now in case, and
|
||||
// reduce it down later if needed...
|
||||
// TODO: should we add a step arg here ?
|
||||
Choose(membership, volunteers etcdtypes.URLsMap) (nominees, quitters []string, err error)
|
||||
|
||||
// Close runs some cleanup routines in case there is anything that you'd
|
||||
// like to free after we're done.
|
||||
Close() error
|
||||
}
|
||||
285
etcd/chooser/dynamicsize.go
Normal file
285
etcd/chooser/dynamicsize.go
Normal file
@@ -0,0 +1,285 @@
|
||||
// Mgmt
|
||||
// Copyright (C) 2013-2019+ James Shubin and the project contributors
|
||||
// Written by James Shubin <james@shubin.ca> and the project contributors
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package chooser
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"sync"
|
||||
|
||||
"github.com/purpleidea/mgmt/etcd/interfaces"
|
||||
|
||||
etcd "github.com/coreos/etcd/clientv3"
|
||||
etcdtypes "github.com/coreos/etcd/pkg/types"
|
||||
)
|
||||
|
||||
// XXX: Test causing cluster shutdowns with:
|
||||
// ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/chooser/dynamicsize/idealclustersize 0
|
||||
// It is currently broken.
|
||||
|
||||
const (
|
||||
// DefaultIdealDynamicSize is the default target ideal dynamic cluster
|
||||
// size used for the initial cluster.
|
||||
DefaultIdealDynamicSize = 5
|
||||
|
||||
// IdealDynamicSizePath is the path key used for the chooser. It usually
|
||||
// gets used with a namespace prefix.
|
||||
IdealDynamicSizePath = "/dynamicsize/idealclustersize"
|
||||
)
|
||||
|
||||
// DynamicSize is a simple implementation of the Chooser interface. This helps
|
||||
// select which machines to add and remove as we elastically grow and shrink our
|
||||
// cluster.
|
||||
// TODO: think of a better name
|
||||
type DynamicSize struct {
|
||||
// IdealClusterSize is the ideal target size for this cluster. If it is
|
||||
// set to zero, then it will use DefaultIdealDynamicSize as the value.
|
||||
IdealClusterSize uint16
|
||||
|
||||
data *Data // save for later
|
||||
client interfaces.Client
|
||||
|
||||
ctx context.Context
|
||||
cancel func()
|
||||
wg *sync.WaitGroup
|
||||
}
|
||||
|
||||
// Validate validates the struct.
|
||||
func (obj *DynamicSize) Validate() error {
|
||||
// TODO: if changed to zero, treat as a cluster shutdown signal
|
||||
if obj.IdealClusterSize < 0 {
|
||||
return fmt.Errorf("must choose a positive IdealClusterSize value")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Init accepts some useful data and handles.
|
||||
func (obj *DynamicSize) Init(data *Data) error {
|
||||
if data.Hostname == "" {
|
||||
return fmt.Errorf("can't Init with empty Hostname value")
|
||||
}
|
||||
if data.Logf == nil {
|
||||
return fmt.Errorf("no Logf function was specified")
|
||||
}
|
||||
|
||||
if obj.IdealClusterSize == 0 {
|
||||
obj.IdealClusterSize = DefaultIdealDynamicSize
|
||||
}
|
||||
|
||||
obj.data = data
|
||||
obj.wg = &sync.WaitGroup{}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close runs some cleanup routines.
|
||||
func (obj *DynamicSize) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Connect is called to accept an etcd.KV namespace that we can use.
|
||||
func (obj *DynamicSize) Connect(ctx context.Context, client interfaces.Client) error {
|
||||
obj.client = client
|
||||
obj.ctx, obj.cancel = context.WithCancel(ctx)
|
||||
size, err := DynamicSizeGet(obj.ctx, obj.client)
|
||||
if err == interfaces.ErrNotExist || (err == nil && size <= 0) {
|
||||
// unset, set in running cluster
|
||||
changed, err := DynamicSizeSet(obj.ctx, obj.client, obj.IdealClusterSize)
|
||||
if err == nil && changed {
|
||||
obj.data.Logf("set dynamic cluster size to: %d", obj.IdealClusterSize)
|
||||
}
|
||||
return err
|
||||
} else if err == nil && size >= 1 {
|
||||
// unset, get from running cluster (use the valid cluster value)
|
||||
if obj.IdealClusterSize != size {
|
||||
obj.data.Logf("using dynamic cluster size of: %d", size)
|
||||
}
|
||||
obj.IdealClusterSize = size // get from exiting cluster...
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// Disconnect is called to cancel our use of the etcd.KV connection.
|
||||
func (obj *DynamicSize) Disconnect() error {
|
||||
if obj.client != nil { // if connect was not called, don't call this...
|
||||
obj.cancel()
|
||||
}
|
||||
obj.wg.Wait()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Watch is called to send events anytime we might want to change membership. It
|
||||
// is also used to watch for changes so that when we get an event, we know to
|
||||
// honour the change in Choose.
|
||||
func (obj *DynamicSize) Watch() (chan error, error) {
|
||||
// NOTE: The body of this function is very similar to the logic in the
|
||||
// simple client.Watcher implementation that wraps ComplexWatcher.
|
||||
path := IdealDynamicSizePath
|
||||
cancelCtx, cancel := context.WithCancel(obj.ctx)
|
||||
info, err := obj.client.ComplexWatcher(cancelCtx, path)
|
||||
if err != nil {
|
||||
defer cancel()
|
||||
return nil, err
|
||||
}
|
||||
ch := make(chan error)
|
||||
obj.wg.Add(1) // hook in to global wait group
|
||||
go func() {
|
||||
defer obj.wg.Done()
|
||||
defer close(ch)
|
||||
defer cancel()
|
||||
var data *interfaces.WatcherData
|
||||
var ok bool
|
||||
for {
|
||||
select {
|
||||
case data, ok = <-info.Events: // read
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
case <-cancelCtx.Done():
|
||||
continue // wait for ch closure, but don't block
|
||||
}
|
||||
|
||||
size := obj.IdealClusterSize
|
||||
for _, event := range data.Events { // apply each event
|
||||
if event.Type != etcd.EventTypePut {
|
||||
continue
|
||||
}
|
||||
key := string(event.Kv.Key)
|
||||
key = key[len(data.Path):] // remove path prefix
|
||||
val := string(event.Kv.Value)
|
||||
if val == "" {
|
||||
continue // ignore empty values
|
||||
}
|
||||
i, err := strconv.Atoi(val)
|
||||
if err != nil {
|
||||
continue // ignore bad values
|
||||
}
|
||||
size = uint16(i) // save
|
||||
}
|
||||
if size == obj.IdealClusterSize {
|
||||
continue // no change
|
||||
}
|
||||
// set before sending the signal
|
||||
obj.IdealClusterSize = size
|
||||
|
||||
if size == 0 { // zero means shutdown
|
||||
obj.data.Logf("impending cluster shutdown...")
|
||||
} else {
|
||||
obj.data.Logf("got new dynamic cluster size of: %d", size)
|
||||
}
|
||||
|
||||
select {
|
||||
case ch <- data.Err: // send (might be nil!)
|
||||
case <-cancelCtx.Done():
|
||||
continue // wait for ch closure, but don't block
|
||||
}
|
||||
}
|
||||
}()
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
// Choose accepts a list of current membership, and a list of volunteers. From
|
||||
// that we can decide who we should add and remove. We return a list of those
|
||||
// nominated, and unnominated users respectively.
|
||||
func (obj *DynamicSize) Choose(membership, volunteers etcdtypes.URLsMap) ([]string, []string, error) {
|
||||
// Possible nominees include anyone that has volunteered, but that
|
||||
// isn't a member.
|
||||
if obj.data.Debug {
|
||||
obj.data.Logf("goal: %d members", obj.IdealClusterSize)
|
||||
}
|
||||
nominees := []string{}
|
||||
for hostname := range volunteers {
|
||||
if _, exists := membership[hostname]; !exists {
|
||||
nominees = append(nominees, hostname)
|
||||
}
|
||||
}
|
||||
|
||||
// Possible quitters include anyone that is a member, but that is not a
|
||||
// volunteer. (They must have unvolunteered.)
|
||||
quitters := []string{}
|
||||
for hostname := range membership {
|
||||
if _, exists := volunteers[hostname]; !exists {
|
||||
quitters = append(quitters, hostname)
|
||||
}
|
||||
}
|
||||
|
||||
// What we want to know...
|
||||
nominated := []string{}
|
||||
unnominated := []string{}
|
||||
|
||||
// We should always only add ONE member at a time!
|
||||
// TODO: is it okay to remove multiple members at the same time?
|
||||
if len(nominees) > 0 && len(membership)-len(quitters) < int(obj.IdealClusterSize) {
|
||||
//unnominated = []string{} // only do one operation at a time
|
||||
nominated = []string{nominees[0]} // FIXME: use a better picker algorithm
|
||||
|
||||
} else if len(quitters) == 0 && len(membership) > int(obj.IdealClusterSize) { // too many members
|
||||
//nominated = []string{} // only do one operation at a time
|
||||
for kicked := range membership {
|
||||
// don't kick ourself unless we are the only one left...
|
||||
if kicked != obj.data.Hostname || (obj.IdealClusterSize == 0 && len(membership) == 1) {
|
||||
unnominated = []string{kicked} // FIXME: use a better picker algorithm
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if len(quitters) > 0 { // must do these before new unvolunteers
|
||||
unnominated = quitters // get rid of the quitters
|
||||
}
|
||||
|
||||
return nominated, unnominated, nil // perform these changes
|
||||
}
|
||||
|
||||
// DynamicSizeGet gets the currently set dynamic size set in the cluster.
|
||||
func DynamicSizeGet(ctx context.Context, client interfaces.Client) (uint16, error) {
|
||||
key := IdealDynamicSizePath
|
||||
m, err := client.Get(ctx, key) // (map[string]string, error)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
val, exists := m[IdealDynamicSizePath]
|
||||
if !exists {
|
||||
return 0, interfaces.ErrNotExist
|
||||
}
|
||||
i, err := strconv.Atoi(val)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("bad value")
|
||||
}
|
||||
return uint16(i), nil
|
||||
}
|
||||
|
||||
// DynamicSizeSet sets the dynamic size in the cluster. It returns true if it
|
||||
// changed or set the value.
|
||||
func DynamicSizeSet(ctx context.Context, client interfaces.Client, size uint16) (bool, error) {
|
||||
key := IdealDynamicSizePath
|
||||
val := strconv.FormatUint(uint64(size), 10) // fmt.Sprintf("%d", size)
|
||||
|
||||
ifCmps := []etcd.Cmp{
|
||||
etcd.Compare(etcd.Value(key), "=", val), // desired state
|
||||
}
|
||||
elseOps := []etcd.Op{etcd.OpPut(key, val)}
|
||||
|
||||
resp, err := client.Txn(ctx, ifCmps, nil, elseOps)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
// succeeded is set to true if the compare evaluated to true
|
||||
changed := !resp.Succeeded
|
||||
|
||||
return changed, err
|
||||
}
|
||||
@@ -1,95 +0,0 @@
|
||||
// Mgmt
|
||||
// Copyright (C) 2013-2019+ James Shubin and the project contributors
|
||||
// Written by James Shubin <james@shubin.ca> and the project contributors
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package etcd
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/purpleidea/mgmt/util/errwrap"
|
||||
|
||||
etcd "github.com/coreos/etcd/clientv3" // "clientv3"
|
||||
context "golang.org/x/net/context"
|
||||
)
|
||||
|
||||
// ClientEtcd provides a simple etcd client for deploy and status operations.
|
||||
type ClientEtcd struct {
|
||||
Seeds []string // list of endpoints to try to connect
|
||||
|
||||
client *etcd.Client
|
||||
}
|
||||
|
||||
// GetClient returns a handle to the raw etcd client object.
|
||||
func (obj *ClientEtcd) GetClient() *etcd.Client {
|
||||
return obj.client
|
||||
}
|
||||
|
||||
// GetConfig returns the config struct to be used for the etcd client connect.
|
||||
func (obj *ClientEtcd) GetConfig() etcd.Config {
|
||||
cfg := etcd.Config{
|
||||
Endpoints: obj.Seeds,
|
||||
// RetryDialer chooses the next endpoint to use
|
||||
// it comes with a default dialer if unspecified
|
||||
DialTimeout: 5 * time.Second,
|
||||
}
|
||||
return cfg
|
||||
}
|
||||
|
||||
// Connect connects the client to a server, and then builds the *API structs.
|
||||
// If reconnect is true, it will force a reconnect with new config endpoints.
|
||||
func (obj *ClientEtcd) Connect() error {
|
||||
if obj.client != nil { // memoize
|
||||
return nil
|
||||
}
|
||||
|
||||
var err error
|
||||
cfg := obj.GetConfig()
|
||||
obj.client, err = etcd.New(cfg) // connect!
|
||||
if err != nil {
|
||||
return errwrap.Wrapf(err, "client connect error")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Destroy cleans up the entire etcd client connection.
|
||||
func (obj *ClientEtcd) Destroy() error {
|
||||
err := obj.client.Close()
|
||||
//obj.wg.Wait()
|
||||
return err
|
||||
}
|
||||
|
||||
// Get runs a get on the client connection. This has the same signature as our
|
||||
// EmbdEtcd Get function.
|
||||
func (obj *ClientEtcd) Get(path string, opts ...etcd.OpOption) (map[string]string, error) {
|
||||
resp, err := obj.client.Get(context.TODO(), path, opts...)
|
||||
if err != nil || resp == nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// TODO: write a resp.ToMap() function on https://godoc.org/github.com/coreos/etcd/etcdserver/etcdserverpb#RangeResponse
|
||||
result := make(map[string]string)
|
||||
for _, x := range resp.Kvs {
|
||||
result[string(x.Key)] = string(x.Value)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Txn runs a transaction on the client connection. This has the same signature
|
||||
// as our EmbdEtcd Txn function.
|
||||
func (obj *ClientEtcd) Txn(ifcmps []etcd.Cmp, thenops, elseops []etcd.Op) (*etcd.TxnResponse, error) {
|
||||
return obj.client.KV.Txn(context.TODO()).If(ifcmps...).Then(thenops...).Else(elseops...).Commit()
|
||||
}
|
||||
@@ -15,60 +15,43 @@
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package etcd
|
||||
package resources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
"github.com/purpleidea/mgmt/engine"
|
||||
engineUtil "github.com/purpleidea/mgmt/engine/util"
|
||||
"github.com/purpleidea/mgmt/etcd/interfaces"
|
||||
"github.com/purpleidea/mgmt/util"
|
||||
|
||||
etcd "github.com/coreos/etcd/clientv3"
|
||||
)
|
||||
|
||||
const (
|
||||
ns = "" // in case we want to add one back in
|
||||
)
|
||||
|
||||
// WatchResources returns a channel that outputs events when exported resources
|
||||
// change.
|
||||
// TODO: Filter our watch (on the server side if possible) based on the
|
||||
// collection prefixes and filters that we care about...
|
||||
func WatchResources(obj *EmbdEtcd) chan error {
|
||||
ch := make(chan error, 1) // buffer it so we can measure it
|
||||
path := fmt.Sprintf("%s/exported/", NS)
|
||||
callback := func(re *RE) error {
|
||||
// TODO: is this even needed? it used to happen on conn errors
|
||||
log.Printf("Etcd: Watch: Path: %v", path) // event
|
||||
if re == nil || re.response.Canceled {
|
||||
return fmt.Errorf("watch is empty") // will cause a CtxError+retry
|
||||
}
|
||||
// we normally need to check if anything changed since the last
|
||||
// event, since a set (export) with no changes still causes the
|
||||
// watcher to trigger and this would cause an infinite loop. we
|
||||
// don't need to do this check anymore because we do the export
|
||||
// transactionally, and only if a change is needed. since it is
|
||||
// atomic, all the changes arrive together which avoids dupes!!
|
||||
if len(ch) == 0 { // send event only if one isn't pending
|
||||
// this check avoids multiple events all queueing up and then
|
||||
// being released continuously long after the changes stopped
|
||||
// do not block!
|
||||
ch <- nil // event
|
||||
}
|
||||
return nil
|
||||
}
|
||||
_, _ = obj.AddWatcher(path, callback, true, false, etcd.WithPrefix()) // no need to check errors
|
||||
return ch
|
||||
func WatchResources(ctx context.Context, client interfaces.Client) (chan error, error) {
|
||||
path := fmt.Sprintf("%s/exported/", ns)
|
||||
return client.Watcher(ctx, path, etcd.WithPrefix())
|
||||
}
|
||||
|
||||
// SetResources exports all of the resources which we pass in to etcd.
|
||||
func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) error {
|
||||
func SetResources(ctx context.Context, client interfaces.Client, hostname string, resourceList []engine.Res) error {
|
||||
// key structure is $NS/exported/$hostname/resources/$uid = $data
|
||||
|
||||
var kindFilter []string // empty to get from everyone
|
||||
hostnameFilter := []string{hostname}
|
||||
// this is not a race because we should only be reading keys which we
|
||||
// set, and there should not be any contention with other hosts here!
|
||||
originals, err := GetResources(obj, hostnameFilter, kindFilter)
|
||||
originals, err := GetResources(ctx, client, hostnameFilter, kindFilter)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -81,10 +64,10 @@ func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) err
|
||||
ops := []etcd.Op{} // list of ops in this transaction
|
||||
for _, res := range resourceList {
|
||||
if res.Kind() == "" {
|
||||
log.Fatalf("Etcd: SetResources: Error: Empty kind: %v", res.Name())
|
||||
return fmt.Errorf("empty kind: %s", res.Name())
|
||||
}
|
||||
uid := fmt.Sprintf("%s/%s", res.Kind(), res.Name())
|
||||
path := fmt.Sprintf("%s/exported/%s/resources/%s", NS, hostname, uid)
|
||||
path := fmt.Sprintf("%s/exported/%s/resources/%s", ns, hostname, uid)
|
||||
if data, err := engineUtil.ResToB64(res); err == nil {
|
||||
ifs = append(ifs, etcd.Compare(etcd.Value(path), "=", data)) // desired state
|
||||
ops = append(ops, etcd.OpPut(path, data))
|
||||
@@ -106,10 +89,10 @@ func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) err
|
||||
// delete old, now unused resources here...
|
||||
for _, res := range originals {
|
||||
if res.Kind() == "" {
|
||||
log.Fatalf("Etcd: SetResources: Error: Empty kind: %v", res.Name())
|
||||
return fmt.Errorf("empty kind: %s", res.Name())
|
||||
}
|
||||
uid := fmt.Sprintf("%s/%s", res.Kind(), res.Name())
|
||||
path := fmt.Sprintf("%s/exported/%s/resources/%s", NS, hostname, uid)
|
||||
path := fmt.Sprintf("%s/exported/%s/resources/%s", ns, hostname, uid)
|
||||
|
||||
if match(res, resourceList) { // if we match, no need to delete!
|
||||
continue
|
||||
@@ -124,9 +107,9 @@ func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) err
|
||||
// it's important to do this in one transaction, and atomically, because
|
||||
// this way, we only generate one watch event, and only when it's needed
|
||||
if hasDeletes { // always run, ifs don't matter
|
||||
_, err = obj.Txn(nil, ops, nil) // TODO: does this run? it should!
|
||||
_, err = client.Txn(ctx, nil, ops, nil) // TODO: does this run? it should!
|
||||
} else {
|
||||
_, err = obj.Txn(ifs, nil, ops) // TODO: do we need to look at response?
|
||||
_, err = client.Txn(ctx, ifs, nil, ops) // TODO: do we need to look at response?
|
||||
}
|
||||
return err
|
||||
}
|
||||
@@ -136,11 +119,11 @@ func SetResources(obj *EmbdEtcd, hostname string, resourceList []engine.Res) err
|
||||
// TODO: Expand this with a more powerful filter based on what we eventually
|
||||
// support in our collect DSL. Ideally a server side filter like WithFilter()
|
||||
// We could do this if the pattern was $NS/exported/$kind/$hostname/$uid = $data.
|
||||
func GetResources(obj *EmbdEtcd, hostnameFilter, kindFilter []string) ([]engine.Res, error) {
|
||||
func GetResources(ctx context.Context, client interfaces.Client, hostnameFilter, kindFilter []string) ([]engine.Res, error) {
|
||||
// key structure is $NS/exported/$hostname/resources/$uid = $data
|
||||
path := fmt.Sprintf("%s/exported/", NS)
|
||||
path := fmt.Sprintf("%s/exported/", ns)
|
||||
resourceList := []engine.Res{}
|
||||
keyMap, err := obj.Get(path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend))
|
||||
keyMap, err := client.Get(ctx, path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not get resources: %v", err)
|
||||
}
|
||||
@@ -160,7 +143,9 @@ func GetResources(obj *EmbdEtcd, hostnameFilter, kindFilter []string) ([]engine.
|
||||
if kind == "" {
|
||||
return nil, fmt.Errorf("unexpected kind chunk")
|
||||
}
|
||||
|
||||
if name == "" { // TODO: should I check this?
|
||||
return nil, fmt.Errorf("unexpected empty name")
|
||||
}
|
||||
// FIXME: ideally this would be a server side filter instead!
|
||||
if len(hostnameFilter) > 0 && !util.StrInList(hostname, hostnameFilter) {
|
||||
continue
|
||||
@@ -171,9 +156,9 @@ func GetResources(obj *EmbdEtcd, hostnameFilter, kindFilter []string) ([]engine.
|
||||
continue
|
||||
}
|
||||
|
||||
if obj, err := engineUtil.B64ToRes(val); err == nil {
|
||||
log.Printf("Etcd: Get: (Hostname, Kind, Name): (%s, %s, %s)", hostname, kind, name)
|
||||
resourceList = append(resourceList, obj)
|
||||
if res, err := engineUtil.B64ToRes(val); err == nil {
|
||||
//obj.Logf("Get: (Hostname, Kind, Name): (%s, %s, %s)", hostname, kind, name)
|
||||
resourceList = append(resourceList, res)
|
||||
} else {
|
||||
return nil, fmt.Errorf("can't convert from B64: %v", err)
|
||||
}
|
||||
484
etcd/client/simple.go
Normal file
484
etcd/client/simple.go
Normal file
@@ -0,0 +1,484 @@
|
||||
// Mgmt
|
||||
// Copyright (C) 2013-2019+ James Shubin and the project contributors
|
||||
// Written by James Shubin <james@shubin.ca> and the project contributors
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package client
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/purpleidea/mgmt/etcd/interfaces"
|
||||
"github.com/purpleidea/mgmt/util/errwrap"
|
||||
|
||||
etcd "github.com/coreos/etcd/clientv3" // "clientv3"
|
||||
"github.com/coreos/etcd/clientv3/namespace"
|
||||
)
|
||||
|
||||
// method represents the method we used to build the simple client.
|
||||
type method uint8
|
||||
|
||||
const (
|
||||
methodError method = iota
|
||||
methodSeeds
|
||||
methodClient
|
||||
methodNamespace
|
||||
)
|
||||
|
||||
// NewClientFromSeeds builds a new simple client by connecting to a list of
|
||||
// seeds.
|
||||
func NewClientFromSeeds(seeds []string) *Simple {
|
||||
return &Simple{
|
||||
method: methodSeeds,
|
||||
wg: &sync.WaitGroup{},
|
||||
|
||||
seeds: seeds,
|
||||
}
|
||||
}
|
||||
|
||||
// NewClientFromSeedsNamespace builds a new simple client by connecting to a
|
||||
// list of seeds and ensuring all key access is prefixed with a namespace.
|
||||
func NewClientFromSeedsNamespace(seeds []string, ns string) *Simple {
|
||||
return &Simple{
|
||||
method: methodSeeds,
|
||||
wg: &sync.WaitGroup{},
|
||||
|
||||
seeds: seeds,
|
||||
namespace: ns,
|
||||
}
|
||||
}
|
||||
|
||||
// NewClientFromClient builds a new simple client by taking an existing client
|
||||
// struct. It does not disconnect this when Close is called, as that is up to
|
||||
// the parent, which is the owner of that client input struct.
|
||||
func NewClientFromClient(client *etcd.Client) *Simple {
|
||||
return &Simple{
|
||||
method: methodClient,
|
||||
wg: &sync.WaitGroup{},
|
||||
|
||||
client: client,
|
||||
}
|
||||
}
|
||||
|
||||
// NewClientFromNamespaceStr builds a new simple client by taking an existing
|
||||
// client and a string namespace. Warning, this doesn't properly nest the
|
||||
// namespaces.
|
||||
func NewClientFromNamespaceStr(client *etcd.Client, ns string) *Simple {
|
||||
if client == nil {
|
||||
return &Simple{
|
||||
method: methodError,
|
||||
err: fmt.Errorf("client is nil"),
|
||||
}
|
||||
}
|
||||
kv := client.KV
|
||||
w := client.Watcher
|
||||
if ns != "" { // only layer if not empty
|
||||
kv = namespace.NewKV(client.KV, ns)
|
||||
w = namespace.NewWatcher(client.Watcher, ns)
|
||||
}
|
||||
|
||||
return &Simple{
|
||||
method: methodClient, // similar enough to this one to share it!
|
||||
wg: &sync.WaitGroup{},
|
||||
|
||||
client: client, // store for GetClient()
|
||||
kv: kv,
|
||||
w: w,
|
||||
}
|
||||
}
|
||||
|
||||
// NewClientFromSimple builds a simple client from an existing client interface
|
||||
// which must be a simple client. This awkward method is required so that
|
||||
// namespace nesting works properly, because the *etcd.Client doesn't directly
|
||||
// pass through the namespace. I'd love to nuke this function, but it's good
|
||||
// enough for now.
|
||||
func NewClientFromSimple(client interfaces.Client, ns string) *Simple {
|
||||
if client == nil {
|
||||
return &Simple{
|
||||
method: methodError,
|
||||
err: fmt.Errorf("client is nil"),
|
||||
}
|
||||
}
|
||||
|
||||
simple, ok := client.(*Simple)
|
||||
if !ok {
|
||||
return &Simple{
|
||||
method: methodError,
|
||||
err: fmt.Errorf("client is not simple"),
|
||||
}
|
||||
}
|
||||
kv := simple.kv
|
||||
w := simple.w
|
||||
if ns != "" { // only layer if not empty
|
||||
kv = namespace.NewKV(simple.kv, ns)
|
||||
w = namespace.NewWatcher(simple.w, ns)
|
||||
}
|
||||
|
||||
return &Simple{
|
||||
method: methodNamespace,
|
||||
wg: &sync.WaitGroup{},
|
||||
|
||||
client: client.GetClient(), // store for GetClient()
|
||||
kv: kv,
|
||||
w: w,
|
||||
}
|
||||
}
|
||||
|
||||
// NewClientFromNamespace builds a new simple client by taking an existing set
|
||||
// of interface API's that we might use.
|
||||
func NewClientFromNamespace(client *etcd.Client, kv etcd.KV, w etcd.Watcher) *Simple {
|
||||
return &Simple{
|
||||
method: methodNamespace,
|
||||
wg: &sync.WaitGroup{},
|
||||
|
||||
client: client, // store for GetClient()
|
||||
kv: kv,
|
||||
w: w,
|
||||
}
|
||||
}
|
||||
|
||||
// Simple provides a simple etcd client for deploy and status operations. You
|
||||
// can set Debug and Logf after you've built this with one of the NewClient*
|
||||
// methods.
|
||||
type Simple struct {
|
||||
Debug bool
|
||||
Logf func(format string, v ...interface{})
|
||||
|
||||
method method
|
||||
wg *sync.WaitGroup
|
||||
|
||||
// err is the error we set when using methodError
|
||||
err error
|
||||
|
||||
// seeds is the list of endpoints to try to connect to.
|
||||
seeds []string
|
||||
namespace string
|
||||
|
||||
// client is the etcd client connection.
|
||||
client *etcd.Client
|
||||
|
||||
// kv and w are the namespaced interfaces that we got passed.
|
||||
kv etcd.KV
|
||||
w etcd.Watcher
|
||||
}
|
||||
|
||||
// logf is a safe wrapper around the Logf parameter that doesn't panic if the
|
||||
// user didn't pass a logger in.
|
||||
func (obj *Simple) logf(format string, v ...interface{}) {
|
||||
if obj.Logf == nil {
|
||||
return
|
||||
}
|
||||
obj.Logf(format, v...)
|
||||
}
|
||||
|
||||
// config returns the config struct to be used for the etcd client connect.
|
||||
func (obj *Simple) config() etcd.Config {
|
||||
cfg := etcd.Config{
|
||||
Endpoints: obj.seeds,
|
||||
// RetryDialer chooses the next endpoint to use
|
||||
// it comes with a default dialer if unspecified
|
||||
DialTimeout: 5 * time.Second,
|
||||
}
|
||||
return cfg
|
||||
}
|
||||
|
||||
// connect connects the client to a server, and then builds the *API structs.
|
||||
func (obj *Simple) connect() error {
|
||||
if obj.client != nil { // memoize
|
||||
return nil
|
||||
}
|
||||
|
||||
var err error
|
||||
cfg := obj.config()
|
||||
obj.client, err = etcd.New(cfg) // connect!
|
||||
if err != nil {
|
||||
return errwrap.Wrapf(err, "client connect error")
|
||||
}
|
||||
obj.kv = obj.client.KV
|
||||
obj.w = obj.client.Watcher
|
||||
if obj.namespace != "" { // bonus feature of seeds method
|
||||
obj.kv = namespace.NewKV(obj.client.KV, obj.namespace)
|
||||
obj.w = namespace.NewWatcher(obj.client.Watcher, obj.namespace)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Init starts up the struct.
|
||||
func (obj *Simple) Init() error {
|
||||
// By the end of this, we must have obj.kv and obj.w available for use.
|
||||
switch obj.method {
|
||||
case methodError:
|
||||
return obj.err // use the error we set
|
||||
|
||||
case methodSeeds:
|
||||
if len(obj.seeds) <= 0 {
|
||||
return fmt.Errorf("zero seeds")
|
||||
}
|
||||
return obj.connect()
|
||||
|
||||
case methodClient:
|
||||
if obj.client == nil {
|
||||
return fmt.Errorf("no client")
|
||||
}
|
||||
if obj.kv == nil { // overwrite if not specified!
|
||||
obj.kv = obj.client.KV
|
||||
}
|
||||
if obj.w == nil {
|
||||
obj.w = obj.client.Watcher
|
||||
}
|
||||
return nil
|
||||
|
||||
case methodNamespace:
|
||||
if obj.kv == nil || obj.w == nil {
|
||||
return fmt.Errorf("empty namespace")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("unknown method: %+v", obj.method)
|
||||
}
|
||||
|
||||
// Close cleans up the struct after we're finished.
|
||||
func (obj *Simple) Close() error {
|
||||
defer obj.wg.Wait()
|
||||
switch obj.method {
|
||||
case methodError: // for consistency
|
||||
return fmt.Errorf("did not Init")
|
||||
|
||||
case methodSeeds:
|
||||
return obj.client.Close()
|
||||
|
||||
case methodClient:
|
||||
// we we're given a client, so we don't own it or close it
|
||||
return nil
|
||||
|
||||
case methodNamespace:
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("unknown method: %+v", obj.method)
|
||||
}
|
||||
|
||||
// GetClient returns a handle to an open etcd Client. This is needed for certain
|
||||
// upstream API's that don't support passing in KV and Watcher instead.
|
||||
func (obj *Simple) GetClient() *etcd.Client {
|
||||
return obj.client
|
||||
}
|
||||
|
||||
// Set runs a set operation. If you'd like more information about whether a
|
||||
// value changed or not, use Txn instead.
|
||||
func (obj *Simple) Set(ctx context.Context, key, value string, opts ...etcd.OpOption) error {
|
||||
// key is the full key path
|
||||
resp, err := obj.kv.Put(ctx, key, value, opts...)
|
||||
if obj.Debug {
|
||||
obj.logf("set(%s): %v", key, resp) // bonus
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Get runs a get operation.
|
||||
func (obj *Simple) Get(ctx context.Context, path string, opts ...etcd.OpOption) (map[string]string, error) {
|
||||
resp, err := obj.kv.Get(ctx, path, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if resp == nil {
|
||||
return nil, fmt.Errorf("empty response")
|
||||
}
|
||||
|
||||
// TODO: write a resp.ToMap() function on https://godoc.org/github.com/coreos/etcd/etcdserver/etcdserverpb#RangeResponse
|
||||
result := make(map[string]string)
|
||||
for _, x := range resp.Kvs {
|
||||
result[string(x.Key)] = string(x.Value)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Del runs a delete operation.
|
||||
func (obj *Simple) Del(ctx context.Context, path string, opts ...etcd.OpOption) (int64, error) {
|
||||
resp, err := obj.kv.Delete(ctx, path, opts...)
|
||||
if err == nil {
|
||||
return resp.Deleted, nil
|
||||
}
|
||||
return -1, err
|
||||
}
|
||||
|
||||
// Txn runs a transaction.
|
||||
func (obj *Simple) Txn(ctx context.Context, ifCmps []etcd.Cmp, thenOps, elseOps []etcd.Op) (*etcd.TxnResponse, error) {
|
||||
resp, err := obj.kv.Txn(ctx).If(ifCmps...).Then(thenOps...).Else(elseOps...).Commit()
|
||||
if obj.Debug {
|
||||
obj.logf("txn: %v", resp) // bonus
|
||||
}
|
||||
return resp, err
|
||||
}
|
||||
|
||||
// Watcher is a watcher that returns a chan of error's instead of a chan with
|
||||
// all sorts of watcher data. This is useful when we only want an event signal,
|
||||
// but we don't care about the specifics.
|
||||
func (obj *Simple) Watcher(ctx context.Context, path string, opts ...etcd.OpOption) (chan error, error) {
|
||||
cancelCtx, cancel := context.WithCancel(ctx)
|
||||
info, err := obj.ComplexWatcher(cancelCtx, path, opts...)
|
||||
if err != nil {
|
||||
defer cancel()
|
||||
return nil, err
|
||||
}
|
||||
ch := make(chan error)
|
||||
obj.wg.Add(1) // hook in to global wait group
|
||||
go func() {
|
||||
defer obj.wg.Done()
|
||||
defer close(ch)
|
||||
defer cancel()
|
||||
var data *interfaces.WatcherData
|
||||
var ok bool
|
||||
for {
|
||||
select {
|
||||
case data, ok = <-info.Events: // read
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
case <-cancelCtx.Done():
|
||||
continue // wait for ch closure, but don't block
|
||||
}
|
||||
|
||||
select {
|
||||
case ch <- data.Err: // send (might be nil!)
|
||||
case <-cancelCtx.Done():
|
||||
continue // wait for ch closure, but don't block
|
||||
}
|
||||
}
|
||||
}()
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
// ComplexWatcher is a more capable watcher that also returns data information.
|
||||
// This starts a watch request. It writes on a channel that you can follow to
|
||||
// know when an event or an error occurs. It always sends one startup event. It
|
||||
// will not return until the watch has been started. If it cannot start, then it
|
||||
// will return an error. Remember to add the WithPrefix() option if you want to
|
||||
// watch recursively.
|
||||
// TODO: do we need to support retry and changed client connections?
|
||||
// XXX: do we need to track last successful revision and retry from there?
|
||||
// XXX: if so, use:
|
||||
// lastRev := response.Header.Revision // TODO: +1 ?
|
||||
// etcd.WithRev(rev)
|
||||
func (obj *Simple) ComplexWatcher(ctx context.Context, path string, opts ...etcd.OpOption) (*interfaces.WatcherInfo, error) {
|
||||
if obj.client == nil { // catch bugs, this often means programming error
|
||||
return nil, fmt.Errorf("client is nil") // extra safety!
|
||||
}
|
||||
cancelCtx, cancel := context.WithCancel(ctx)
|
||||
eventsChan := make(chan *interfaces.WatcherData) // channel of runtime errors
|
||||
|
||||
var count uint8
|
||||
wg := &sync.WaitGroup{}
|
||||
|
||||
// TODO: if we can detect the use of WithCreatedNotify, we don't need to
|
||||
// hard-code it down below... https://github.com/coreos/etcd/issues/9689
|
||||
// XXX: proof of concept patch: https://github.com/coreos/etcd/pull/9705
|
||||
//for _, op := range opts {
|
||||
// //if op.Cmp(etcd.WithCreatedNotify()) == nil { // would be best
|
||||
// if etcd.OpOptionCmp(op, etcd.WithCreatedNotify()) == nil {
|
||||
// count++
|
||||
// wg.Add(1)
|
||||
// break
|
||||
// }
|
||||
//}
|
||||
count++
|
||||
wg.Add(1)
|
||||
|
||||
wOpts := []etcd.OpOption{
|
||||
etcd.WithCreatedNotify(),
|
||||
}
|
||||
wOpts = append(wOpts, opts...)
|
||||
var err error
|
||||
|
||||
obj.wg.Add(1) // hook in to global wait group
|
||||
go func() {
|
||||
defer obj.wg.Done()
|
||||
defer close(eventsChan)
|
||||
defer cancel() // it's safe to cancel() more than once!
|
||||
ch := obj.w.Watch(cancelCtx, path, wOpts...)
|
||||
for {
|
||||
var resp etcd.WatchResponse
|
||||
var ok bool
|
||||
var created bool
|
||||
select {
|
||||
case resp, ok = <-ch:
|
||||
if !ok {
|
||||
if count > 0 { // closed before startup
|
||||
// set err in parent scope!
|
||||
err = fmt.Errorf("watch closed")
|
||||
count--
|
||||
wg.Done()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// the watch is now running!
|
||||
if count > 0 && resp.Created {
|
||||
created = true
|
||||
count--
|
||||
wg.Done()
|
||||
}
|
||||
|
||||
isCanceled := resp.Canceled || resp.Err() == context.Canceled
|
||||
// TODO: this might not be needed
|
||||
if resp.Header.Revision == 0 { // by inspection
|
||||
if obj.Debug {
|
||||
obj.logf("watch: received empty message") // switched client connection
|
||||
}
|
||||
isCanceled = true
|
||||
}
|
||||
|
||||
if isCanceled {
|
||||
data := &interfaces.WatcherData{
|
||||
Err: context.Canceled,
|
||||
}
|
||||
select { // send the error
|
||||
case eventsChan <- data:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
continue // channel should close shortly
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: consider processing the response data into a
|
||||
// more useful form for the callback...
|
||||
data := &interfaces.WatcherData{
|
||||
Created: created,
|
||||
Path: path,
|
||||
Header: resp.Header,
|
||||
Events: resp.Events,
|
||||
Err: resp.Err(),
|
||||
}
|
||||
|
||||
select { // send the event
|
||||
case eventsChan <- data:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
wg.Wait() // wait for created event before we return
|
||||
|
||||
return &interfaces.WatcherInfo{
|
||||
Cancel: cancel,
|
||||
Events: eventsChan,
|
||||
}, err
|
||||
}
|
||||
@@ -15,20 +15,22 @@
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package etcd
|
||||
package str
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/purpleidea/mgmt/etcd/interfaces"
|
||||
"github.com/purpleidea/mgmt/util/errwrap"
|
||||
|
||||
etcd "github.com/coreos/etcd/clientv3"
|
||||
etcdutil "github.com/coreos/etcd/clientv3/clientv3util"
|
||||
)
|
||||
|
||||
// ErrNotExist is returned when GetStr can not find the requested key.
|
||||
// TODO: https://dave.cheney.net/2016/04/07/constant-errors
|
||||
var ErrNotExist = errors.New("errNotExist")
|
||||
const (
|
||||
ns = "" // in case we want to add one back in
|
||||
)
|
||||
|
||||
// WatchStr returns a channel which spits out events on key activity.
|
||||
// FIXME: It should close the channel when it's done, and spit out errors when
|
||||
@@ -37,37 +39,23 @@ var ErrNotExist = errors.New("errNotExist")
|
||||
// done, does that mean we leak go-routines since it might still be running, but
|
||||
// perhaps even blocked??? Could this cause a dead-lock? Should we instead return
|
||||
// some sort of struct which has a close method with it to ask for a shutdown?
|
||||
func WatchStr(obj *EmbdEtcd, key string) chan error {
|
||||
func WatchStr(ctx context.Context, client interfaces.Client, key string) (chan error, error) {
|
||||
// new key structure is $NS/strings/$key = $data
|
||||
path := fmt.Sprintf("%s/strings/%s", NS, key)
|
||||
ch := make(chan error, 1)
|
||||
// FIXME: fix our API so that we get a close event on shutdown.
|
||||
callback := func(re *RE) error {
|
||||
// TODO: is this even needed? it used to happen on conn errors
|
||||
//log.Printf("Etcd: Watch: Path: %v", path) // event
|
||||
if re == nil || re.response.Canceled {
|
||||
return fmt.Errorf("watch is empty") // will cause a CtxError+retry
|
||||
}
|
||||
if len(ch) == 0 { // send event only if one isn't pending
|
||||
ch <- nil // event
|
||||
}
|
||||
return nil
|
||||
}
|
||||
_, _ = obj.AddWatcher(path, callback, true, false, etcd.WithPrefix()) // no need to check errors
|
||||
return ch
|
||||
path := fmt.Sprintf("%s/strings/%s", ns, key)
|
||||
return client.Watcher(ctx, path)
|
||||
}
|
||||
|
||||
// GetStr collects the string which matches a global namespace in etcd.
|
||||
func GetStr(obj *EmbdEtcd, key string) (string, error) {
|
||||
func GetStr(ctx context.Context, client interfaces.Client, key string) (string, error) {
|
||||
// new key structure is $NS/strings/$key = $data
|
||||
path := fmt.Sprintf("%s/strings/%s", NS, key)
|
||||
keyMap, err := obj.Get(path, etcd.WithPrefix())
|
||||
path := fmt.Sprintf("%s/strings/%s", ns, key)
|
||||
keyMap, err := client.Get(ctx, path, etcd.WithPrefix())
|
||||
if err != nil {
|
||||
return "", errwrap.Wrapf(err, "could not get strings in: %s", key)
|
||||
}
|
||||
|
||||
if len(keyMap) == 0 {
|
||||
return "", ErrNotExist
|
||||
return "", interfaces.ErrNotExist
|
||||
}
|
||||
|
||||
if count := len(keyMap); count != 1 {
|
||||
@@ -79,23 +67,21 @@ func GetStr(obj *EmbdEtcd, key string) (string, error) {
|
||||
return "", fmt.Errorf("path `%s` is missing", path)
|
||||
}
|
||||
|
||||
//log.Printf("Etcd: GetStr(%s): %s", key, val)
|
||||
return val, nil
|
||||
}
|
||||
|
||||
// SetStr sets a key and hostname pair to a certain value. If the value is
|
||||
// nil, then it deletes the key. Otherwise the value should point to a string.
|
||||
// TODO: TTL or delete disconnect?
|
||||
func SetStr(obj *EmbdEtcd, key string, data *string) error {
|
||||
func SetStr(ctx context.Context, client interfaces.Client, key string, data *string) error {
|
||||
// key structure is $NS/strings/$key = $data
|
||||
path := fmt.Sprintf("%s/strings/%s", NS, key)
|
||||
path := fmt.Sprintf("%s/strings/%s", ns, key)
|
||||
ifs := []etcd.Cmp{} // list matching the desired state
|
||||
ops := []etcd.Op{} // list of ops in this transaction (then)
|
||||
els := []etcd.Op{} // list of ops in this transaction (else)
|
||||
if data == nil { // perform a delete
|
||||
// TODO: use https://github.com/coreos/etcd/pull/7417 if merged
|
||||
//ifs = append(ifs, etcd.KeyExists(path))
|
||||
ifs = append(ifs, etcd.Compare(etcd.Version(path), ">", 0))
|
||||
ifs = append(ifs, etcdutil.KeyExists(path))
|
||||
//ifs = append(ifs, etcd.Compare(etcd.Version(path), ">", 0))
|
||||
ops = append(ops, etcd.OpDelete(path))
|
||||
} else {
|
||||
data := *data // get the real value
|
||||
@@ -105,6 +91,6 @@ func SetStr(obj *EmbdEtcd, key string, data *string) error {
|
||||
|
||||
// it's important to do this in one transaction, and atomically, because
|
||||
// this way, we only generate one watch event, and only when it's needed
|
||||
_, err := obj.Txn(ifs, ops, els) // TODO: do we need to look at response?
|
||||
_, err := client.Txn(ctx, ifs, ops, els) // TODO: do we need to look at response?
|
||||
return errwrap.Wrapf(err, "could not set strings in: %s", key)
|
||||
}
|
||||
@@ -15,50 +15,43 @@
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package etcd
|
||||
package strmap
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/purpleidea/mgmt/etcd/interfaces"
|
||||
"github.com/purpleidea/mgmt/util"
|
||||
"github.com/purpleidea/mgmt/util/errwrap"
|
||||
|
||||
etcd "github.com/coreos/etcd/clientv3"
|
||||
etcdutil "github.com/coreos/etcd/clientv3/clientv3util"
|
||||
)
|
||||
|
||||
const (
|
||||
ns = "" // in case we want to add one back in
|
||||
)
|
||||
|
||||
// WatchStrMap returns a channel which spits out events on key activity.
|
||||
// FIXME: It should close the channel when it's done, and spit out errors when
|
||||
// something goes wrong.
|
||||
func WatchStrMap(obj *EmbdEtcd, key string) chan error {
|
||||
func WatchStrMap(ctx context.Context, client interfaces.Client, key string) (chan error, error) {
|
||||
// new key structure is $NS/strings/$key/$hostname = $data
|
||||
path := fmt.Sprintf("%s/strings/%s", NS, key)
|
||||
ch := make(chan error, 1)
|
||||
// FIXME: fix our API so that we get a close event on shutdown.
|
||||
callback := func(re *RE) error {
|
||||
// TODO: is this even needed? it used to happen on conn errors
|
||||
//log.Printf("Etcd: Watch: Path: %v", path) // event
|
||||
if re == nil || re.response.Canceled {
|
||||
return fmt.Errorf("watch is empty") // will cause a CtxError+retry
|
||||
}
|
||||
if len(ch) == 0 { // send event only if one isn't pending
|
||||
ch <- nil // event
|
||||
}
|
||||
return nil
|
||||
}
|
||||
_, _ = obj.AddWatcher(path, callback, true, false, etcd.WithPrefix()) // no need to check errors
|
||||
return ch
|
||||
path := fmt.Sprintf("%s/strings/%s", ns, key)
|
||||
return client.Watcher(ctx, path, etcd.WithPrefix())
|
||||
}
|
||||
|
||||
// GetStrMap collects all of the strings which match a namespace in etcd.
|
||||
func GetStrMap(obj *EmbdEtcd, hostnameFilter []string, key string) (map[string]string, error) {
|
||||
func GetStrMap(ctx context.Context, client interfaces.Client, hostnameFilter []string, key string) (map[string]string, error) {
|
||||
// old key structure is $NS/strings/$hostname/$key = $data
|
||||
// new key structure is $NS/strings/$key/$hostname = $data
|
||||
// FIXME: if we have the $key as the last token (old key structure), we
|
||||
// can allow the key to contain the slash char, otherwise we need to
|
||||
// verify that one isn't present in the input string.
|
||||
path := fmt.Sprintf("%s/strings/%s", NS, key)
|
||||
keyMap, err := obj.Get(path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend))
|
||||
path := fmt.Sprintf("%s/strings/%s", ns, key)
|
||||
keyMap, err := client.Get(ctx, path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend))
|
||||
if err != nil {
|
||||
return nil, errwrap.Wrapf(err, "could not get strings in: %s", key)
|
||||
}
|
||||
@@ -91,16 +84,15 @@ func GetStrMap(obj *EmbdEtcd, hostnameFilter []string, key string) (map[string]s
|
||||
// SetStrMap sets a key and hostname pair to a certain value. If the value is
|
||||
// nil, then it deletes the key. Otherwise the value should point to a string.
|
||||
// TODO: TTL or delete disconnect?
|
||||
func SetStrMap(obj *EmbdEtcd, hostname, key string, data *string) error {
|
||||
func SetStrMap(ctx context.Context, client interfaces.Client, hostname, key string, data *string) error {
|
||||
// key structure is $NS/strings/$key/$hostname = $data
|
||||
path := fmt.Sprintf("%s/strings/%s/%s", NS, key, hostname)
|
||||
path := fmt.Sprintf("%s/strings/%s/%s", ns, key, hostname)
|
||||
ifs := []etcd.Cmp{} // list matching the desired state
|
||||
ops := []etcd.Op{} // list of ops in this transaction (then)
|
||||
els := []etcd.Op{} // list of ops in this transaction (else)
|
||||
if data == nil { // perform a delete
|
||||
// TODO: use https://github.com/coreos/etcd/pull/7417 if merged
|
||||
//ifs = append(ifs, etcd.KeyExists(path))
|
||||
ifs = append(ifs, etcd.Compare(etcd.Version(path), ">", 0))
|
||||
ifs = append(ifs, etcdutil.KeyExists(path))
|
||||
//ifs = append(ifs, etcd.Compare(etcd.Version(path), ">", 0))
|
||||
ops = append(ops, etcd.OpDelete(path))
|
||||
} else {
|
||||
data := *data // get the real value
|
||||
@@ -110,6 +102,6 @@ func SetStrMap(obj *EmbdEtcd, hostname, key string, data *string) error {
|
||||
|
||||
// it's important to do this in one transaction, and atomically, because
|
||||
// this way, we only generate one watch event, and only when it's needed
|
||||
_, err := obj.Txn(ifs, ops, els) // TODO: do we need to look at response?
|
||||
_, err := client.Txn(ctx, ifs, ops, els) // TODO: do we need to look at response?
|
||||
return errwrap.Wrapf(err, "could not set strings in: %s", key)
|
||||
}
|
||||
49
etcd/converger.go
Normal file
49
etcd/converger.go
Normal file
@@ -0,0 +1,49 @@
|
||||
// Mgmt
|
||||
// Copyright (C) 2013-2019+ James Shubin and the project contributors
|
||||
// Written by James Shubin <james@shubin.ca> and the project contributors
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package etcd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/purpleidea/mgmt/util/errwrap"
|
||||
|
||||
etcd "github.com/coreos/etcd/clientv3"
|
||||
)
|
||||
|
||||
// setHostnameConverged sets whether a specific hostname is converged.
|
||||
func (obj *EmbdEtcd) setHostnameConverged(ctx context.Context, hostname string, isConverged bool) error {
|
||||
if obj.Debug {
|
||||
obj.Logf("setHostnameConverged(%s): %t", hostname, isConverged)
|
||||
defer obj.Logf("setHostnameConverged(%s): done!", hostname)
|
||||
}
|
||||
|
||||
key := fmt.Sprintf(obj.NS+convergedPathFmt, hostname)
|
||||
data := fmt.Sprintf("%t", isConverged)
|
||||
|
||||
// XXX: bug: https://github.com/etcd-io/etcd/issues/10566
|
||||
// XXX: reverse things with els to workaround the bug :(
|
||||
//ifs := []etcd.Cmp{etcd.Compare(etcd.Value(key), "!=", data)} // desired state
|
||||
//ops := []etcd.Op{etcd.OpPut(key, data, etcd.WithLease(obj.leaseID))}
|
||||
ifs := []etcd.Cmp{etcd.Compare(etcd.Value(key), "=", data)} // desired state
|
||||
ifs = append(ifs, etcd.Compare(etcd.LeaseValue(key), "=", obj.leaseID))
|
||||
els := []etcd.Op{etcd.OpPut(key, data, etcd.WithLease(obj.leaseID))}
|
||||
|
||||
_, err := obj.client.Txn(ctx, ifs, nil, els)
|
||||
return errwrap.Wrapf(err, "set hostname converged failed")
|
||||
}
|
||||
@@ -15,16 +15,20 @@
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package etcd
|
||||
package deployer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/purpleidea/mgmt/etcd/interfaces"
|
||||
"github.com/purpleidea/mgmt/util/errwrap"
|
||||
|
||||
etcd "github.com/coreos/etcd/clientv3"
|
||||
etcdutil "github.com/coreos/etcd/clientv3/clientv3util"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -33,34 +37,52 @@ const (
|
||||
hashPath = "hash"
|
||||
)
|
||||
|
||||
// WatchDeploy returns a channel which spits out events on new deploy activity.
|
||||
// FIXME: It should close the channel when it's done, and spit out errors when
|
||||
// something goes wrong.
|
||||
func WatchDeploy(obj *EmbdEtcd) chan error {
|
||||
// key structure is $NS/deploy/$id/payload = $data
|
||||
path := fmt.Sprintf("%s/%s/", NS, deployPath)
|
||||
ch := make(chan error, 1)
|
||||
// FIXME: fix our API so that we get a close event on shutdown.
|
||||
callback := func(re *RE) error {
|
||||
// TODO: is this even needed? it used to happen on conn errors
|
||||
//log.Printf("Etcd: Watch: Path: %v", path) // event
|
||||
if re == nil || re.response.Canceled {
|
||||
return fmt.Errorf("watch is empty") // will cause a CtxError+retry
|
||||
}
|
||||
if len(ch) == 0 { // send event only if one isn't pending
|
||||
ch <- nil // event
|
||||
// SimpleDeploy is a deploy struct that provides all of the needed deploy
|
||||
// methods. It requires that you give it a Client interface so that it can
|
||||
// perform its remote work. You must call Init before you use it, and Close when
|
||||
// you are done.
|
||||
type SimpleDeploy struct {
|
||||
Client interfaces.Client
|
||||
|
||||
Debug bool
|
||||
Logf func(format string, v ...interface{})
|
||||
|
||||
ns string // TODO: if we ever need to hardcode a base path
|
||||
wg *sync.WaitGroup
|
||||
}
|
||||
|
||||
// Init validates the deploy structure and prepares it for first use.
|
||||
func (obj *SimpleDeploy) Init() error {
|
||||
if obj.Client == nil {
|
||||
return fmt.Errorf("the Client was not specified")
|
||||
}
|
||||
obj.wg = &sync.WaitGroup{}
|
||||
return nil
|
||||
}
|
||||
_, _ = obj.AddWatcher(path, callback, true, false, etcd.WithPrefix()) // no need to check errors
|
||||
return ch
|
||||
}
|
||||
|
||||
// Close cleans up after using the deploy struct and waits for any ongoing
|
||||
// watches to exit before it returns.
|
||||
func (obj *SimpleDeploy) Close() error {
|
||||
obj.wg.Wait()
|
||||
return nil
|
||||
}
|
||||
|
||||
// WatchDeploy returns a channel which spits out events on new deploy activity.
|
||||
// It closes the channel when it's done, and spits out errors when something
|
||||
// goes wrong. If it can't start up, it errors immediately. The returned channel
|
||||
// is buffered, so that a quick succession of events will get discarded.
|
||||
func (obj *SimpleDeploy) WatchDeploy(ctx context.Context) (chan error, error) {
|
||||
// key structure is $NS/deploy/$id/payload = $data
|
||||
path := fmt.Sprintf("%s/%s/", obj.ns, deployPath)
|
||||
// FIXME: obj.wg.Add(1) && obj.wg.Done()
|
||||
return obj.Client.Watcher(ctx, path, etcd.WithPrefix())
|
||||
}
|
||||
|
||||
// GetDeploys gets all the available deploys.
|
||||
func GetDeploys(obj Client) (map[uint64]string, error) {
|
||||
func (obj *SimpleDeploy) GetDeploys(ctx context.Context) (map[uint64]string, error) {
|
||||
// key structure is $NS/deploy/$id/payload = $data
|
||||
path := fmt.Sprintf("%s/%s/", NS, deployPath)
|
||||
keyMap, err := obj.Get(path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend))
|
||||
path := fmt.Sprintf("%s/%s/", obj.ns, deployPath)
|
||||
keyMap, err := obj.Client.Get(ctx, path, etcd.WithPrefix(), etcd.WithSort(etcd.SortByKey, etcd.SortAscend))
|
||||
if err != nil {
|
||||
return nil, errwrap.Wrapf(err, "could not get deploy")
|
||||
}
|
||||
@@ -86,7 +108,7 @@ func GetDeploys(obj Client) (map[uint64]string, error) {
|
||||
}
|
||||
|
||||
// TODO: do some sort of filtering here?
|
||||
//log.Printf("Etcd: GetDeploys(%s): Id => Data: %d => %s", key, id, val)
|
||||
//obj.Logf("GetDeploys(%s): Id => Data: %d => %s", key, id, val)
|
||||
result[id] = val
|
||||
}
|
||||
return result, nil
|
||||
@@ -107,8 +129,8 @@ func calculateMax(deploys map[uint64]string) uint64 {
|
||||
// an id of 0, you'll get back an empty deploy without error. This is useful so
|
||||
// that you can pass through this function easily.
|
||||
// FIXME: implement this more efficiently so that it doesn't have to download *all* the old deploys from etcd!
|
||||
func GetDeploy(obj Client, id uint64) (string, error) {
|
||||
result, err := GetDeploys(obj)
|
||||
func (obj *SimpleDeploy) GetDeploy(ctx context.Context, id uint64) (string, error) {
|
||||
result, err := obj.GetDeploys(ctx)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
@@ -130,9 +152,9 @@ func GetDeploy(obj Client, id uint64) (string, error) {
|
||||
// zero. You must increment the returned value by one when you add a deploy. If
|
||||
// two or more clients race for this deploy id, then the loser is not committed,
|
||||
// and must repeat this GetMaxDeployID process until it succeeds with a commit!
|
||||
func GetMaxDeployID(obj Client) (uint64, error) {
|
||||
func (obj *SimpleDeploy) GetMaxDeployID(ctx context.Context) (uint64, error) {
|
||||
// TODO: this was all implemented super inefficiently, fix up for perf!
|
||||
deploys, err := GetDeploys(obj) // get previous deploys
|
||||
deploys, err := obj.GetDeploys(ctx) // get previous deploys
|
||||
if err != nil {
|
||||
return 0, errwrap.Wrapf(err, "error getting previous deploys")
|
||||
}
|
||||
@@ -148,29 +170,28 @@ func GetMaxDeployID(obj Client) (uint64, error) {
|
||||
// contributors pushing conflicting deploys. This isn't git specific, and so any
|
||||
// arbitrary string hash can be used.
|
||||
// FIXME: prune old deploys from the store when they aren't needed anymore...
|
||||
func AddDeploy(obj Client, id uint64, hash, pHash string, data *string) error {
|
||||
func (obj *SimpleDeploy) AddDeploy(ctx context.Context, id uint64, hash, pHash string, data *string) error {
|
||||
// key structure is $NS/deploy/$id/payload = $data
|
||||
// key structure is $NS/deploy/$id/hash = $hash
|
||||
path := fmt.Sprintf("%s/%s/%d/%s", NS, deployPath, id, payloadPath)
|
||||
tPath := fmt.Sprintf("%s/%s/%d/%s", NS, deployPath, id, hashPath)
|
||||
path := fmt.Sprintf("%s/%s/%d/%s", obj.ns, deployPath, id, payloadPath)
|
||||
tPath := fmt.Sprintf("%s/%s/%d/%s", obj.ns, deployPath, id, hashPath)
|
||||
ifs := []etcd.Cmp{} // list matching the desired state
|
||||
ops := []etcd.Op{} // list of ops in this transaction (then)
|
||||
|
||||
// TODO: use https://github.com/coreos/etcd/pull/7417 if merged
|
||||
// we're append only, so ensure this unique deploy id doesn't exist
|
||||
ifs = append(ifs, etcd.Compare(etcd.Version(path), "=", 0)) // KeyMissing
|
||||
//ifs = append(ifs, etcd.KeyMissing(path))
|
||||
//ifs = append(ifs, etcd.Compare(etcd.Version(path), "=", 0)) // KeyMissing
|
||||
ifs = append(ifs, etcdutil.KeyMissing(path))
|
||||
|
||||
// don't look for previous deploy if this is the first deploy ever
|
||||
if id > 1 {
|
||||
// we append sequentially, so ensure previous key *does* exist
|
||||
prev := fmt.Sprintf("%s/%s/%d/%s", NS, deployPath, id-1, payloadPath)
|
||||
ifs = append(ifs, etcd.Compare(etcd.Version(prev), ">", 0)) // KeyExists
|
||||
//ifs = append(ifs, etcd.KeyExists(prev))
|
||||
prev := fmt.Sprintf("%s/%s/%d/%s", obj.ns, deployPath, id-1, payloadPath)
|
||||
//ifs = append(ifs, etcd.Compare(etcd.Version(prev), ">", 0)) // KeyExists
|
||||
ifs = append(ifs, etcdutil.KeyExists(prev))
|
||||
|
||||
if hash != "" && pHash != "" {
|
||||
// does the previously stored hash match what we expect?
|
||||
prevHash := fmt.Sprintf("%s/%s/%d/%s", NS, deployPath, id-1, hashPath)
|
||||
prevHash := fmt.Sprintf("%s/%s/%d/%s", obj.ns, deployPath, id-1, hashPath)
|
||||
ifs = append(ifs, etcd.Compare(etcd.Value(prevHash), "=", pHash))
|
||||
}
|
||||
}
|
||||
@@ -182,7 +203,7 @@ func AddDeploy(obj Client, id uint64, hash, pHash string, data *string) error {
|
||||
|
||||
// it's important to do this in one transaction, and atomically, because
|
||||
// this way, we only generate one watch event, and only when it's needed
|
||||
result, err := obj.Txn(ifs, ops, nil)
|
||||
result, err := obj.Client.Txn(ctx, ifs, ops, nil)
|
||||
if err != nil {
|
||||
return errwrap.Wrapf(err, "error creating deploy id %d", id)
|
||||
}
|
||||
@@ -18,13 +18,10 @@
|
||||
package etcd
|
||||
|
||||
import (
|
||||
etcd "github.com/coreos/etcd/clientv3" // "clientv3"
|
||||
"github.com/purpleidea/mgmt/etcd/interfaces"
|
||||
)
|
||||
|
||||
// Client provides a simple interface specification for client requests. Both
|
||||
// EmbdEtcd and ClientEtcd implement this.
|
||||
type Client interface {
|
||||
// TODO: add more method signatures
|
||||
Get(path string, opts ...etcd.OpOption) (map[string]string, error)
|
||||
Txn(ifcmps []etcd.Cmp, thenops, elseops []etcd.Op) (*etcd.TxnResponse, error)
|
||||
}
|
||||
const (
|
||||
// errInconsistentApply means applyDeltaEvents wasn't consistent.
|
||||
errInconsistentApply = interfaces.Error("inconsistent apply")
|
||||
)
|
||||
3018
etcd/etcd.go
3018
etcd/etcd.go
File diff suppressed because it is too large
Load Diff
@@ -21,31 +21,19 @@ package etcd
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
etcdtypes "github.com/coreos/etcd/pkg/types"
|
||||
)
|
||||
|
||||
func TestNewEmbdEtcd(t *testing.T) {
|
||||
// should return a new etcd object
|
||||
|
||||
noServer := false
|
||||
var flags Flags
|
||||
|
||||
obj := NewEmbdEtcd("", nil, nil, nil, nil, nil, noServer, false, 0, flags, "", nil)
|
||||
if obj == nil {
|
||||
t.Fatal("failed to create server object")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewEmbdEtcdConfigValidation(t *testing.T) {
|
||||
// running --no-server with no --seeds specified should fail early
|
||||
|
||||
seeds := make(etcdtypes.URLs, 0)
|
||||
noServer := true
|
||||
var flags Flags
|
||||
|
||||
obj := NewEmbdEtcd("", seeds, nil, nil, nil, nil, noServer, false, 0, flags, "", nil)
|
||||
if obj != nil {
|
||||
t.Fatal("server initialization should fail on invalid configuration")
|
||||
func TestValidation1(t *testing.T) {
|
||||
// running --no-server with no --seeds should not validate at the moment
|
||||
embdEtcd := &EmbdEtcd{
|
||||
//Seeds: etcdtypes.URLs{},
|
||||
NoServer: true,
|
||||
}
|
||||
if err := embdEtcd.Validate(); err == nil {
|
||||
t.Errorf("expected validation err, got nil")
|
||||
}
|
||||
if err := embdEtcd.Init(); err == nil {
|
||||
t.Errorf("expected init err, got nil")
|
||||
defer embdEtcd.Close()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
// Mgmt
|
||||
// Copyright (C) 2013-2019+ James Shubin and the project contributors
|
||||
// Written by James Shubin <james@shubin.ca> and the project contributors
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// Package event provides some primitives that are used for message passing.
|
||||
package event
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Resp is a channel to be used for boolean responses. A nil represents an ACK,
|
||||
// and a non-nil represents a NACK (false). This also lets us use custom errors.
|
||||
type Resp chan error
|
||||
|
||||
// NewResp is just a helper to return the right type of response channel.
|
||||
func NewResp() Resp {
|
||||
resp := make(chan error)
|
||||
return resp
|
||||
}
|
||||
|
||||
// ACK sends a true value to resp.
|
||||
func (resp Resp) ACK() {
|
||||
if resp != nil {
|
||||
resp <- nil // TODO: close instead?
|
||||
}
|
||||
}
|
||||
|
||||
// NACK sends a false value to resp.
|
||||
func (resp Resp) NACK() {
|
||||
if resp != nil {
|
||||
resp <- fmt.Errorf("NACK")
|
||||
}
|
||||
}
|
||||
|
||||
// ACKNACK sends a custom ACK or NACK. The ACK value is always nil, the NACK can
|
||||
// be any non-nil error value.
|
||||
func (resp Resp) ACKNACK(err error) {
|
||||
if resp != nil {
|
||||
resp <- err
|
||||
}
|
||||
}
|
||||
|
||||
// Wait waits for any response from a Resp channel and returns it.
|
||||
func (resp Resp) Wait() error {
|
||||
return <-resp
|
||||
}
|
||||
|
||||
// ACKWait waits for a +ive Ack from a Resp channel.
|
||||
func (resp Resp) ACKWait() {
|
||||
for {
|
||||
// wait until true value
|
||||
if resp.Wait() == nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -22,7 +22,6 @@ import (
|
||||
"encoding/gob"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
@@ -32,6 +31,7 @@ import (
|
||||
"github.com/purpleidea/mgmt/util/errwrap"
|
||||
|
||||
etcd "github.com/coreos/etcd/clientv3" // "clientv3"
|
||||
etcdutil "github.com/coreos/etcd/clientv3/clientv3util"
|
||||
)
|
||||
|
||||
func init() {
|
||||
@@ -263,10 +263,8 @@ func (obj *File) Sync() error {
|
||||
|
||||
p := obj.path() // store file data at this path in etcd
|
||||
|
||||
// TODO: use https://github.com/coreos/etcd/pull/7417 if merged
|
||||
cmp := etcd.Compare(etcd.Version(p), "=", 0) // KeyMissing
|
||||
//cmp := etcd.KeyMissing(p))
|
||||
|
||||
//cmp := etcd.Compare(etcd.Version(p), "=", 0) // KeyMissing
|
||||
cmp := etcdutil.KeyMissing(p)
|
||||
op := etcd.OpPut(p, string(obj.data)) // this pushes contents to server
|
||||
|
||||
// it's important to do this in one transaction, and atomically, because
|
||||
@@ -277,7 +275,7 @@ func (obj *File) Sync() error {
|
||||
}
|
||||
if !result.Succeeded {
|
||||
if obj.fs.Debug {
|
||||
log.Printf("debug: data already exists in storage")
|
||||
obj.fs.Logf("debug: data already exists in storage")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ package fs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/gob"
|
||||
"encoding/hex"
|
||||
@@ -27,19 +28,18 @@ import (
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/purpleidea/mgmt/etcd/interfaces"
|
||||
"github.com/purpleidea/mgmt/util/errwrap"
|
||||
|
||||
etcd "github.com/coreos/etcd/clientv3" // "clientv3"
|
||||
rpctypes "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
|
||||
"github.com/spf13/afero"
|
||||
context "golang.org/x/net/context"
|
||||
)
|
||||
|
||||
func init() {
|
||||
@@ -91,7 +91,7 @@ var (
|
||||
// XXX: this is harder because we need to list of *all* metadata paths, if we
|
||||
// want them to be able to share storage backends. (we do)
|
||||
type Fs struct {
|
||||
Client *etcd.Client
|
||||
Client interfaces.Client
|
||||
|
||||
Metadata string // location of "superblock" for this filesystem
|
||||
|
||||
@@ -99,6 +99,7 @@ type Fs struct {
|
||||
Hash string // eg: sha256
|
||||
|
||||
Debug bool
|
||||
Logf func(format string, v ...interface{})
|
||||
|
||||
sb *superBlock
|
||||
mounted bool
|
||||
@@ -115,7 +116,7 @@ type superBlock struct {
|
||||
|
||||
// NewEtcdFs creates a new filesystem handle on an etcd client connection. You
|
||||
// must specify the metadata string that you wish to use.
|
||||
func NewEtcdFs(client *etcd.Client, metadata string) afero.Fs {
|
||||
func NewEtcdFs(client interfaces.Client, metadata string) afero.Fs {
|
||||
return &Fs{
|
||||
Client: client,
|
||||
Metadata: metadata,
|
||||
@@ -127,23 +128,26 @@ func (obj *Fs) get(path string, opts ...etcd.OpOption) (map[string][]byte, error
|
||||
ctx, cancel := context.WithTimeout(context.Background(), EtcdTimeout)
|
||||
resp, err := obj.Client.Get(ctx, path, opts...)
|
||||
cancel()
|
||||
if err != nil || resp == nil {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if resp == nil {
|
||||
return nil, fmt.Errorf("empty response")
|
||||
}
|
||||
|
||||
// TODO: write a resp.ToMap() function on https://godoc.org/github.com/coreos/etcd/etcdserver/etcdserverpb#RangeResponse
|
||||
result := make(map[string][]byte) // formerly: map[string][]byte
|
||||
for _, x := range resp.Kvs {
|
||||
result[string(x.Key)] = x.Value // formerly: bytes.NewBuffer(x.Value).String()
|
||||
// FIXME: just return resp instead if it was map[string]string?
|
||||
result := make(map[string][]byte)
|
||||
for key, val := range resp {
|
||||
result[key] = []byte(val) // wasteful transform
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// put a value into etcd.
|
||||
func (obj *Fs) put(path string, data []byte, opts ...etcd.OpOption) error {
|
||||
func (obj *Fs) set(path string, data []byte, opts ...etcd.OpOption) error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), EtcdTimeout)
|
||||
_, err := obj.Client.Put(ctx, path, string(data), opts...) // TODO: obj.Client.KV ?
|
||||
err := obj.Client.Set(ctx, path, string(data), opts...)
|
||||
cancel()
|
||||
if err != nil {
|
||||
switch err {
|
||||
@@ -163,7 +167,7 @@ func (obj *Fs) put(path string, data []byte, opts ...etcd.OpOption) error {
|
||||
// txn runs a txn in etcd.
|
||||
func (obj *Fs) txn(ifcmps []etcd.Cmp, thenops, elseops []etcd.Op) (*etcd.TxnResponse, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), EtcdTimeout)
|
||||
resp, err := obj.Client.Txn(ctx).If(ifcmps...).Then(thenops...).Else(elseops...).Commit()
|
||||
resp, err := obj.Client.Txn(ctx, ifcmps, thenops, elseops)
|
||||
cancel()
|
||||
return resp, err
|
||||
}
|
||||
@@ -194,7 +198,7 @@ func (obj *Fs) sync() error {
|
||||
return errwrap.Wrapf(err, "gob failed to encode")
|
||||
}
|
||||
//base64.StdEncoding.EncodeToString(b.Bytes())
|
||||
return obj.put(obj.Metadata, b.Bytes())
|
||||
return obj.set(obj.Metadata, b.Bytes())
|
||||
}
|
||||
|
||||
// mount downloads the initial cache of metadata, including the *file tree.
|
||||
@@ -213,7 +217,7 @@ func (obj *Fs) mount() error {
|
||||
}
|
||||
if result == nil || len(result) == 0 { // nothing found, create the fs
|
||||
if obj.Debug {
|
||||
log.Printf("debug: mount: creating new fs at: %s", obj.Metadata)
|
||||
obj.Logf("mount: creating new fs at: %s", obj.Metadata)
|
||||
}
|
||||
// trim any trailing slashes from DataPrefix
|
||||
for strings.HasSuffix(obj.DataPrefix, "/") {
|
||||
@@ -248,7 +252,7 @@ func (obj *Fs) mount() error {
|
||||
}
|
||||
|
||||
if obj.Debug {
|
||||
log.Printf("debug: mount: opening old fs at: %s", obj.Metadata)
|
||||
obj.Logf("mount: opening old fs at: %s", obj.Metadata)
|
||||
}
|
||||
sb, exists := result[obj.Metadata]
|
||||
if !exists {
|
||||
|
||||
@@ -26,7 +26,7 @@ import (
|
||||
"syscall"
|
||||
"testing"
|
||||
|
||||
"github.com/purpleidea/mgmt/etcd"
|
||||
"github.com/purpleidea/mgmt/etcd/client"
|
||||
etcdfs "github.com/purpleidea/mgmt/etcd/fs"
|
||||
"github.com/purpleidea/mgmt/integration"
|
||||
"github.com/purpleidea/mgmt/util"
|
||||
@@ -41,6 +41,7 @@ import (
|
||||
const (
|
||||
umask = 0666
|
||||
superblock = "/some/superblock" // TODO: generate randomly per test?
|
||||
ns = "/_mgmt/test" // must not end with a slash!
|
||||
)
|
||||
|
||||
// Ensure that etcdfs.Fs implements afero.Fs.
|
||||
@@ -79,20 +80,26 @@ func TestFs1(t *testing.T) {
|
||||
}
|
||||
defer stopEtcd() // ignore the error
|
||||
|
||||
etcdClient := &etcd.ClientEtcd{
|
||||
Seeds: []string{"localhost:2379"}, // endpoints
|
||||
logf := func(format string, v ...interface{}) {
|
||||
t.Logf("test: etcd: fs: "+format, v...)
|
||||
}
|
||||
etcdClient := client.NewClientFromSeedsNamespace(
|
||||
[]string{"localhost:2379"}, // endpoints
|
||||
ns,
|
||||
)
|
||||
|
||||
if err := etcdClient.Connect(); err != nil {
|
||||
if err := etcdClient.Init(); err != nil {
|
||||
t.Errorf("client connection error: %+v", err)
|
||||
return
|
||||
}
|
||||
defer etcdClient.Destroy()
|
||||
defer etcdClient.Close()
|
||||
|
||||
etcdFs := &etcdfs.Fs{
|
||||
Client: etcdClient.GetClient(),
|
||||
Client: etcdClient,
|
||||
Metadata: superblock,
|
||||
DataPrefix: etcdfs.DefaultDataPrefix,
|
||||
|
||||
Logf: logf,
|
||||
}
|
||||
//var etcdFs afero.Fs = NewEtcdFs()
|
||||
|
||||
@@ -193,20 +200,26 @@ func TestFs2(t *testing.T) {
|
||||
}
|
||||
defer stopEtcd() // ignore the error
|
||||
|
||||
etcdClient := &etcd.ClientEtcd{
|
||||
Seeds: []string{"localhost:2379"}, // endpoints
|
||||
logf := func(format string, v ...interface{}) {
|
||||
t.Logf("test: etcd: fs: "+format, v...)
|
||||
}
|
||||
etcdClient := client.NewClientFromSeedsNamespace(
|
||||
[]string{"localhost:2379"}, // endpoints
|
||||
ns,
|
||||
)
|
||||
|
||||
if err := etcdClient.Connect(); err != nil {
|
||||
if err := etcdClient.Init(); err != nil {
|
||||
t.Errorf("client connection error: %+v", err)
|
||||
return
|
||||
}
|
||||
defer etcdClient.Destroy()
|
||||
defer etcdClient.Close()
|
||||
|
||||
etcdFs := &etcdfs.Fs{
|
||||
Client: etcdClient.GetClient(),
|
||||
Client: etcdClient,
|
||||
Metadata: superblock,
|
||||
DataPrefix: etcdfs.DefaultDataPrefix,
|
||||
|
||||
Logf: logf,
|
||||
}
|
||||
|
||||
tree, err := util.FsTree(etcdFs, "/")
|
||||
@@ -246,20 +259,26 @@ func TestFs3(t *testing.T) {
|
||||
}
|
||||
defer stopEtcd() // ignore the error
|
||||
|
||||
etcdClient := &etcd.ClientEtcd{
|
||||
Seeds: []string{"localhost:2379"}, // endpoints
|
||||
logf := func(format string, v ...interface{}) {
|
||||
t.Logf("test: etcd: fs: "+format, v...)
|
||||
}
|
||||
etcdClient := client.NewClientFromSeedsNamespace(
|
||||
[]string{"localhost:2379"}, // endpoints
|
||||
ns,
|
||||
)
|
||||
|
||||
if err := etcdClient.Connect(); err != nil {
|
||||
if err := etcdClient.Init(); err != nil {
|
||||
t.Errorf("client connection error: %+v", err)
|
||||
return
|
||||
}
|
||||
defer etcdClient.Destroy()
|
||||
defer etcdClient.Close()
|
||||
|
||||
etcdFs := &etcdfs.Fs{
|
||||
Client: etcdClient.GetClient(),
|
||||
Client: etcdClient,
|
||||
Metadata: superblock,
|
||||
DataPrefix: etcdfs.DefaultDataPrefix,
|
||||
|
||||
Logf: logf,
|
||||
}
|
||||
|
||||
if err := etcdFs.Mkdir("/tmp", umask); err != nil {
|
||||
@@ -371,18 +390,19 @@ func TestEtcdCopyFs0(t *testing.T) {
|
||||
}
|
||||
defer stopEtcd() // ignore the error
|
||||
|
||||
etcdClient := &etcd.ClientEtcd{
|
||||
Seeds: []string{"localhost:2379"}, // endpoints
|
||||
}
|
||||
etcdClient := client.NewClientFromSeedsNamespace(
|
||||
[]string{"localhost:2379"}, // endpoints
|
||||
ns,
|
||||
)
|
||||
|
||||
if err := etcdClient.Connect(); err != nil {
|
||||
if err := etcdClient.Init(); err != nil {
|
||||
t.Errorf("client connection error: %+v", err)
|
||||
return
|
||||
}
|
||||
defer etcdClient.Destroy()
|
||||
defer etcdClient.Close()
|
||||
|
||||
etcdFs := &etcdfs.Fs{
|
||||
Client: etcdClient.GetClient(),
|
||||
Client: etcdClient,
|
||||
Metadata: superblock,
|
||||
DataPrefix: etcdfs.DefaultDataPrefix,
|
||||
}
|
||||
|
||||
160
etcd/helpers.go
Normal file
160
etcd/helpers.go
Normal file
@@ -0,0 +1,160 @@
|
||||
// Mgmt
|
||||
// Copyright (C) 2013-2019+ James Shubin and the project contributors
|
||||
// Written by James Shubin <james@shubin.ca> and the project contributors
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package etcd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/purpleidea/mgmt/etcd/interfaces"
|
||||
"github.com/purpleidea/mgmt/util"
|
||||
"github.com/purpleidea/mgmt/util/errwrap"
|
||||
|
||||
etcd "github.com/coreos/etcd/clientv3"
|
||||
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
||||
"github.com/coreos/etcd/mvcc/mvccpb"
|
||||
etcdtypes "github.com/coreos/etcd/pkg/types" // generated package
|
||||
)
|
||||
|
||||
// setEndpoints sets the endpoints on the etcd client if it exists. It
|
||||
// prioritizes local endpoints for performance, and so that if a remote endpoint
|
||||
// disconnects we aren't affected.
|
||||
func (obj *EmbdEtcd) setEndpoints() {
|
||||
if obj.etcd == nil { // if client doesn't exist, skip!
|
||||
return
|
||||
}
|
||||
|
||||
eps := fromURLsMapToStringList(obj.endpoints) // get flat list
|
||||
sort.Strings(eps) // sort for determinism
|
||||
|
||||
curls, _ := obj.curls() // ignore error, was already validated
|
||||
|
||||
// prio sort so we connect locally first
|
||||
urls := fromURLsToStringList(curls)
|
||||
headFn := func(x string) bool {
|
||||
return !util.StrInList(x, urls)
|
||||
}
|
||||
eps = util.PriorityStrSliceSort(eps, headFn)
|
||||
if obj.Debug {
|
||||
obj.Logf("set endpoints to: %+v", eps)
|
||||
}
|
||||
// trigger reconnect with new endpoint list
|
||||
// XXX: When a client switches endpoints, do the watches continue from
|
||||
// where they last were or do they restart? Add rev restart if needed.
|
||||
obj.etcd.SetEndpoints(eps...) // no error to check
|
||||
}
|
||||
|
||||
// ConnectBlock runs a command as soon as the client is connected. When this
|
||||
// happens, it closes the output channel. In case any error occurs, it sends it
|
||||
// on that channel.
|
||||
func (obj *EmbdEtcd) ConnectBlock(ctx context.Context, fn func(context.Context) error) <-chan error {
|
||||
ch := make(chan error)
|
||||
obj.wg.Add(1)
|
||||
go func() {
|
||||
defer obj.wg.Done()
|
||||
defer close(ch)
|
||||
select {
|
||||
case <-obj.connectSignal: // the client is connected!
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
if fn == nil {
|
||||
return
|
||||
}
|
||||
if err := fn(ctx); err != nil {
|
||||
select {
|
||||
case ch <- err:
|
||||
case <-ctx.Done():
|
||||
}
|
||||
}
|
||||
}()
|
||||
return ch
|
||||
}
|
||||
|
||||
// bootstrapWatcherData returns some a minimal WatcherData struct to simulate an
|
||||
// initial event for bootstrapping the nominateCb before we've started up.
|
||||
func bootstrapWatcherData(hostname string, urls etcdtypes.URLs) *interfaces.WatcherData {
|
||||
return &interfaces.WatcherData{
|
||||
Created: true, // add this flag to hint that we're bootstrapping
|
||||
|
||||
Header: pb.ResponseHeader{}, // not needed
|
||||
Events: []*etcd.Event{
|
||||
{
|
||||
Type: mvccpb.PUT, // or mvccpb.DELETE
|
||||
Kv: &mvccpb.KeyValue{
|
||||
Key: []byte(hostname),
|
||||
Value: []byte(urls.String()),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// applyDeltaEvents applies the WatchResponse deltas to a URLsMap and returns a
|
||||
// modified copy.
|
||||
func applyDeltaEvents(data *interfaces.WatcherData, urlsMap etcdtypes.URLsMap) (etcdtypes.URLsMap, error) {
|
||||
if err := data.Err; err != nil {
|
||||
return nil, errwrap.Wrapf(err, "data contains an error")
|
||||
}
|
||||
out, err := copyURLsMap(urlsMap)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if data == nil { // passthrough
|
||||
return out, nil
|
||||
}
|
||||
var reterr error
|
||||
for _, event := range data.Events {
|
||||
key := string(event.Kv.Key)
|
||||
key = key[len(data.Path):] // remove path prefix
|
||||
//obj.Logf("applyDeltaEvents: Event(%s): %s", event.Type.String(), key)
|
||||
|
||||
switch event.Type {
|
||||
case etcd.EventTypePut:
|
||||
val := string(event.Kv.Value)
|
||||
if val == "" {
|
||||
return nil, fmt.Errorf("value is empty")
|
||||
}
|
||||
urls, err := etcdtypes.NewURLs(strings.Split(val, ","))
|
||||
if err != nil {
|
||||
return nil, errwrap.Wrapf(err, "format error")
|
||||
}
|
||||
urlsMap[key] = urls // add to map
|
||||
|
||||
// expiry cases are seen as delete in v3 for now
|
||||
//case etcd.EventTypeExpire: // doesn't exist right now
|
||||
// fallthrough
|
||||
case etcd.EventTypeDelete:
|
||||
if _, exists := urlsMap[key]; exists {
|
||||
delete(urlsMap, key)
|
||||
continue
|
||||
}
|
||||
|
||||
// this can happen if we retry an operation between a
|
||||
// reconnect, so ignore in case we are reconnecting...
|
||||
reterr = errInconsistentApply // key not found
|
||||
// keep applying in case this is ignored
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown event: %v", event.Type)
|
||||
}
|
||||
}
|
||||
return urlsMap, reterr
|
||||
}
|
||||
63
etcd/interfaces/client.go
Normal file
63
etcd/interfaces/client.go
Normal file
@@ -0,0 +1,63 @@
|
||||
// Mgmt
|
||||
// Copyright (C) 2013-2019+ James Shubin and the project contributors
|
||||
// Written by James Shubin <james@shubin.ca> and the project contributors
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package interfaces
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
etcd "github.com/coreos/etcd/clientv3" // "clientv3"
|
||||
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
||||
)
|
||||
|
||||
// WatcherData is the structure of data passed to a callback from any watcher.
|
||||
type WatcherData struct {
|
||||
// Created is true if this event is the initial event sent on startup.
|
||||
Created bool
|
||||
|
||||
// XXX: what goes here... this? or a more processed version?
|
||||
Path string // the path we're watching
|
||||
Header pb.ResponseHeader
|
||||
Events []*etcd.Event
|
||||
Err error
|
||||
}
|
||||
|
||||
// WatcherInfo is what is returned from a Watcher. It contains everything you
|
||||
// might need to get information about the running watch.
|
||||
type WatcherInfo struct {
|
||||
// Cancel must be called to shutdown the Watcher when we are done with
|
||||
// it. You can alternatively call cancel on the input ctx.
|
||||
Cancel func()
|
||||
|
||||
// Events returns a channel of any events that occur. This happens on
|
||||
// watch startup, watch event, and watch failure. This channel closes
|
||||
// when the Watcher shuts down. If you block on these reads, then you
|
||||
// will block the entire Watcher which is usually not what you want.
|
||||
Events <-chan *WatcherData
|
||||
}
|
||||
|
||||
// Client provides a simple interface specification for client requests. Both
|
||||
// EmbdEtcd.MakeClient and client.Simple implement this.
|
||||
type Client interface {
|
||||
GetClient() *etcd.Client
|
||||
Set(ctx context.Context, key, value string, opts ...etcd.OpOption) error
|
||||
Get(ctx context.Context, path string, opts ...etcd.OpOption) (map[string]string, error)
|
||||
Del(ctx context.Context, path string, opts ...etcd.OpOption) (int64, error)
|
||||
Txn(ctx context.Context, ifCmps []etcd.Cmp, thenOps, elseOps []etcd.Op) (*etcd.TxnResponse, error)
|
||||
Watcher(ctx context.Context, path string, opts ...etcd.OpOption) (chan error, error)
|
||||
ComplexWatcher(ctx context.Context, path string, opts ...etcd.OpOption) (*WatcherInfo, error)
|
||||
}
|
||||
33
etcd/interfaces/error.go
Normal file
33
etcd/interfaces/error.go
Normal file
@@ -0,0 +1,33 @@
|
||||
// Mgmt
|
||||
// Copyright (C) 2013-2019+ James Shubin and the project contributors
|
||||
// Written by James Shubin <james@shubin.ca> and the project contributors
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package interfaces
|
||||
|
||||
// Error is a constant error type that implements error.
|
||||
type Error string
|
||||
|
||||
// Error fulfills the error interface of this type.
|
||||
func (e Error) Error() string { return string(e) }
|
||||
|
||||
const (
|
||||
// ErrNotExist is returned when GetStr or friends can not find the
|
||||
// requested key.
|
||||
ErrNotExist = Error("ErrNotExist")
|
||||
|
||||
// ErrShutdown is returned when we're exiting during a shutdown.
|
||||
ErrShutdown = Error("ErrShutdown")
|
||||
)
|
||||
314
etcd/membership.go
Normal file
314
etcd/membership.go
Normal file
@@ -0,0 +1,314 @@
|
||||
// Mgmt
|
||||
// Copyright (C) 2013-2019+ James Shubin and the project contributors
|
||||
// Written by James Shubin <james@shubin.ca> and the project contributors
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package etcd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/purpleidea/mgmt/util/errwrap"
|
||||
|
||||
etcd "github.com/coreos/etcd/clientv3"
|
||||
rpctypes "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
|
||||
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
||||
etcdtypes "github.com/coreos/etcd/pkg/types"
|
||||
)
|
||||
|
||||
// addSelfState is used to populate the initial state when I am adding myself.
|
||||
func (obj *EmbdEtcd) addSelfState() {
|
||||
surls, _ := obj.surls() // validated on init
|
||||
curls, _ := obj.curls() // validated on init
|
||||
obj.membermap[obj.Hostname] = surls
|
||||
obj.endpoints[obj.Hostname] = curls
|
||||
obj.memberIDs[obj.Hostname] = obj.serverID
|
||||
}
|
||||
|
||||
// addMemberState adds the specific member state to our local caches.
|
||||
func (obj *EmbdEtcd) addMemberState(member string, id uint64, surls, curls etcdtypes.URLs) {
|
||||
obj.stateMutex.Lock()
|
||||
defer obj.stateMutex.Unlock()
|
||||
if surls != nil {
|
||||
obj.membermap[member] = surls
|
||||
}
|
||||
if curls != nil { // TODO: && len(curls) > 0 ?
|
||||
obj.endpoints[member] = curls
|
||||
}
|
||||
obj.memberIDs[member] = id
|
||||
}
|
||||
|
||||
// rmMemberState removes the state of a given member.
|
||||
func (obj *EmbdEtcd) rmMemberState(member string) {
|
||||
obj.stateMutex.Lock()
|
||||
defer obj.stateMutex.Unlock()
|
||||
delete(obj.membermap, member) // proactively delete it
|
||||
delete(obj.endpoints, member) // proactively delete it
|
||||
delete(obj.memberIDs, member) // proactively delete it
|
||||
}
|
||||
|
||||
// updateMemberState updates some of our local state whenever we get new
|
||||
// information from a response.
|
||||
// TODO: ideally this would be []*etcd.Member but the types are inconsistent...
|
||||
// TODO: is it worth computing a delta to see if we need to change this?
|
||||
func (obj *EmbdEtcd) updateMemberState(members []*pb.Member) error {
|
||||
//nominated := make(etcdtypes.URLsMap)
|
||||
//volunteers := make(etcdtypes.URLsMap)
|
||||
membermap := make(etcdtypes.URLsMap) // map[hostname]URLs
|
||||
endpoints := make(etcdtypes.URLsMap) // map[hostname]URLs
|
||||
memberIDs := make(map[string]uint64) // map[hostname]memberID
|
||||
|
||||
// URLs is etcdtypes.URLs is []url.URL
|
||||
for _, member := range members {
|
||||
// member.ID // uint64
|
||||
// member.Name // string (hostname)
|
||||
// member.PeerURLs // []string (URLs)
|
||||
// member.ClientURLs // []string (URLs)
|
||||
|
||||
if member.Name == "" { // not started yet
|
||||
continue
|
||||
}
|
||||
|
||||
// []string -> etcdtypes.URLs
|
||||
purls, err := etcdtypes.NewURLs(member.PeerURLs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
curls, err := etcdtypes.NewURLs(member.ClientURLs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
//nominated[member.Name] = member.PeerURLs
|
||||
//volunteers[member.Name] = member.PeerURLs
|
||||
membermap[member.Name] = purls
|
||||
endpoints[member.Name] = curls
|
||||
memberIDs[member.Name] = member.ID
|
||||
}
|
||||
|
||||
// set
|
||||
obj.stateMutex.Lock()
|
||||
defer obj.stateMutex.Unlock()
|
||||
// can't set these two, because we only have a partial knowledge of them
|
||||
//obj.nominated = nominated // can't get this information (partial)
|
||||
//obj.volunteers = volunteers // can't get this information (partial)
|
||||
obj.membermap = membermap
|
||||
obj.endpoints = endpoints
|
||||
obj.memberIDs = memberIDs
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// memberList returns the current list of server peer members in the cluster.
|
||||
func (obj *EmbdEtcd) memberList(ctx context.Context) (*etcd.MemberListResponse, error) {
|
||||
return obj.etcd.MemberList(ctx)
|
||||
}
|
||||
|
||||
// memberAdd adds a member to the cluster.
|
||||
func (obj *EmbdEtcd) memberAdd(ctx context.Context, peerURLs etcdtypes.URLs) (*etcd.MemberAddResponse, error) {
|
||||
resp, err := obj.etcd.MemberAdd(ctx, peerURLs.StringSlice())
|
||||
if err == rpctypes.ErrPeerURLExist { // commonly seen at startup
|
||||
return nil, nil
|
||||
}
|
||||
if err == rpctypes.ErrMemberExist { // not seen yet, but plan for it
|
||||
return nil, nil
|
||||
}
|
||||
return resp, err
|
||||
}
|
||||
|
||||
// memberRemove removes a member by ID and returns if it worked, and also if
|
||||
// there was an error. This is because it might have run without error, but the
|
||||
// member wasn't found, for example. If a value of zero is used, then it will
|
||||
// try to remove itself in an idempotent way based on whether we're supposed to
|
||||
// be running a server or not.
|
||||
func (obj *EmbdEtcd) memberRemove(ctx context.Context, memberID uint64) (*etcd.MemberRemoveResponse, error) {
|
||||
if memberID == 0 {
|
||||
// copy value to avoid it changing part way through
|
||||
memberID = obj.serverID
|
||||
}
|
||||
if memberID == 0 {
|
||||
return nil, fmt.Errorf("can't remove memberID of zero")
|
||||
}
|
||||
|
||||
resp, err := obj.etcd.MemberRemove(ctx, memberID)
|
||||
if err == rpctypes.ErrMemberNotFound {
|
||||
// if we get this, member already shut itself down :)
|
||||
return nil, nil // unchanged, mask this error
|
||||
}
|
||||
|
||||
return resp, err // changed
|
||||
}
|
||||
|
||||
// memberChange polls the member list API and runs a function on each iteration.
|
||||
// If that function returns nil, then it closes the output channel to signal an
|
||||
// event. Between iterations, it sleeps for a given interval. Since this polls
|
||||
// and doesn't watch events, it could miss changes if they happen rapidly. It
|
||||
// does not send results on the channel, since results could be captured in the
|
||||
// fn callback. It will send an error on the channel if something goes wrong.
|
||||
// TODO: https://github.com/coreos/etcd/issues/5277
|
||||
func (obj *EmbdEtcd) memberChange(ctx context.Context, fn func([]*pb.Member) error, d time.Duration) (chan error, error) {
|
||||
ch := make(chan error)
|
||||
go func() {
|
||||
defer close(ch)
|
||||
for {
|
||||
resp, err := obj.etcd.MemberList(ctx)
|
||||
if err != nil {
|
||||
select {
|
||||
case ch <- err: // send error
|
||||
case <-ctx.Done():
|
||||
}
|
||||
return
|
||||
}
|
||||
result := fn(resp.Members)
|
||||
if result == nil { // done!
|
||||
return
|
||||
}
|
||||
select {
|
||||
case <-time.After(d): // sleep before retry
|
||||
// pass
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
// memberStateFromList does a member list, and applies the state to our cache.
|
||||
func (obj *EmbdEtcd) memberStateFromList(ctx context.Context) error {
|
||||
resp, err := obj.memberList(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if resp == nil {
|
||||
return fmt.Errorf("empty response")
|
||||
}
|
||||
reterr := obj.updateMemberState(resp.Members)
|
||||
if reterr == nil {
|
||||
obj.setEndpoints() // sync client with new endpoints
|
||||
}
|
||||
return reterr
|
||||
}
|
||||
|
||||
// isLeader returns true if I'm the leader from the first sane perspective (pov)
|
||||
// that I can arbitrarily pick.
|
||||
func (obj *EmbdEtcd) isLeader(ctx context.Context) (bool, error) {
|
||||
if obj.server == nil {
|
||||
return false, nil // if i'm not a server, i'm not a leader, return
|
||||
}
|
||||
|
||||
var ep, backup *url.URL
|
||||
if len(obj.ClientURLs) > 0 {
|
||||
// heuristic, but probably correct
|
||||
addresses := localhostURLs(obj.ClientURLs)
|
||||
if len(addresses) > 0 {
|
||||
ep = &addresses[0] // arbitrarily pick the first one
|
||||
}
|
||||
backup = &obj.ClientURLs[0] // backup
|
||||
}
|
||||
if ep == nil && len(obj.AClientURLs) > 0 {
|
||||
addresses := localhostURLs(obj.AClientURLs)
|
||||
if len(addresses) > 0 {
|
||||
ep = &addresses[0]
|
||||
}
|
||||
backup = &obj.AClientURLs[0] // backup
|
||||
}
|
||||
if ep == nil {
|
||||
ep = backup
|
||||
}
|
||||
if ep == nil { // programming error?
|
||||
return false, fmt.Errorf("no available endpoints")
|
||||
}
|
||||
|
||||
// Ask for one perspective...
|
||||
// TODO: are we supposed to use ep.Host instead?
|
||||
resp, err := obj.etcd.Maintenance.Status(ctx, ep.String()) // this perspective
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if resp == nil {
|
||||
return false, fmt.Errorf("empty response")
|
||||
}
|
||||
if resp.Leader != obj.serverID { // i am not the leader
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// moveLeaderSomewhere tries to transfer the leader to the alphanumerically
|
||||
// lowest member if the caller is the current leader. This contains races. If it
|
||||
// succeeds, it returns the member hostname that it transferred to. If it can't
|
||||
// transfer, but doesn't error, it returns an empty string. Any error condition
|
||||
// returns an error.
|
||||
func (obj *EmbdEtcd) moveLeaderSomewhere(ctx context.Context) (string, error) {
|
||||
//if isLeader, err := obj.isLeader(ctx); err != nil { // race!
|
||||
// return "", errwrap.Wrapf(err, "error determining leader")
|
||||
//} else if !isLeader {
|
||||
// if obj.Debug {
|
||||
// obj.Logf("we are not the leader...")
|
||||
// }
|
||||
// return "", nil
|
||||
//}
|
||||
// assume i am the leader!
|
||||
|
||||
memberList, err := obj.memberList(ctx)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var transfereeID uint64
|
||||
m := make(map[string]uint64)
|
||||
names := []string{}
|
||||
for _, x := range memberList.Members {
|
||||
m[x.Name] = x.ID
|
||||
if x.Name != obj.Hostname {
|
||||
names = append(names, x.Name)
|
||||
}
|
||||
}
|
||||
if len(names) == 0 {
|
||||
return "", nil // can't transfer to self, last remaining host
|
||||
}
|
||||
if len(names) == 1 && names[0] == obj.Hostname { // does this happen?
|
||||
return "", nil // can't transfer to self
|
||||
}
|
||||
sort.Strings(names)
|
||||
if len(names) > 0 {
|
||||
// transfer to alphanumerically lowest ID for consistency...
|
||||
transfereeID = m[names[0]]
|
||||
}
|
||||
|
||||
if transfereeID == 0 { // safety
|
||||
return "", fmt.Errorf("got memberID of zero")
|
||||
}
|
||||
if transfereeID == obj.serverID {
|
||||
return "", nil // can't transfer to self
|
||||
}
|
||||
|
||||
// do the move
|
||||
if _, err := obj.etcd.MoveLeader(ctx, transfereeID); err == rpctypes.ErrNotLeader {
|
||||
if obj.Debug {
|
||||
obj.Logf("we are not the leader...")
|
||||
}
|
||||
return "", nil // we are not the leader
|
||||
} else if err != nil {
|
||||
return "", errwrap.Wrapf(err, "error moving leader")
|
||||
}
|
||||
return names[0], nil
|
||||
}
|
||||
482
etcd/methods.go
482
etcd/methods.go
@@ -18,394 +18,220 @@
|
||||
package etcd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/purpleidea/mgmt/util/errwrap"
|
||||
|
||||
etcd "github.com/coreos/etcd/clientv3"
|
||||
rpctypes "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
|
||||
etcdutil "github.com/coreos/etcd/clientv3/clientv3util"
|
||||
etcdtypes "github.com/coreos/etcd/pkg/types"
|
||||
context "golang.org/x/net/context"
|
||||
)
|
||||
|
||||
// TODO: Could all these Etcd*(obj *EmbdEtcd, ...) functions which deal with the
|
||||
// interface between etcd paths and behaviour be grouped into a single struct ?
|
||||
|
||||
// Nominate nominates a particular client to be a server (peer).
|
||||
func Nominate(obj *EmbdEtcd, hostname string, urls etcdtypes.URLs) error {
|
||||
if obj.flags.Trace {
|
||||
log.Printf("Trace: Etcd: Nominate(%v): %v", hostname, urls.String())
|
||||
defer log.Printf("Trace: Etcd: Nominate(%v): Finished!", hostname)
|
||||
// volunteer offers yourself up to be a server if needed. If you specify a nil
|
||||
// value for urls, then this will unvolunteer yourself.
|
||||
func (obj *EmbdEtcd) volunteer(ctx context.Context, urls etcdtypes.URLs) error {
|
||||
if obj.Debug {
|
||||
if urls == nil {
|
||||
obj.Logf("unvolunteer...")
|
||||
defer obj.Logf("unvolunteer: done!")
|
||||
} else {
|
||||
obj.Logf("volunteer: %s", urls.String())
|
||||
defer obj.Logf("volunteer: done!")
|
||||
}
|
||||
// nominate someone to be a server
|
||||
nominate := fmt.Sprintf("%s/nominated/%s", NS, hostname)
|
||||
ops := []etcd.Op{} // list of ops in this txn
|
||||
if urls != nil {
|
||||
ops = append(ops, etcd.OpPut(nominate, urls.String())) // TODO: add a TTL? (etcd.WithLease)
|
||||
|
||||
} else { // delete message if set to erase
|
||||
ops = append(ops, etcd.OpDelete(nominate))
|
||||
}
|
||||
|
||||
if _, err := obj.Txn(nil, ops, nil); err != nil {
|
||||
return fmt.Errorf("nominate failed") // exit in progress?
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Nominated returns a urls map of nominated etcd server volunteers.
|
||||
// NOTE: I know 'nominees' might be more correct, but is less consistent here
|
||||
func Nominated(obj *EmbdEtcd) (etcdtypes.URLsMap, error) {
|
||||
path := fmt.Sprintf("%s/nominated/", NS)
|
||||
keyMap, err := obj.Get(path, etcd.WithPrefix()) // map[string]string, bool
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("nominated isn't available: %v", err)
|
||||
}
|
||||
nominated := make(etcdtypes.URLsMap)
|
||||
for key, val := range keyMap { // loop through directory of nominated
|
||||
if !strings.HasPrefix(key, path) {
|
||||
continue
|
||||
}
|
||||
name := key[len(path):] // get name of nominee
|
||||
if val == "" { // skip "erased" values
|
||||
continue
|
||||
}
|
||||
urls, err := etcdtypes.NewURLs(strings.Split(val, ","))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("nominated data format error: %v", err)
|
||||
}
|
||||
nominated[name] = urls // add to map
|
||||
if obj.flags.Debug {
|
||||
log.Printf("Etcd: Nominated(%v): %v", name, val)
|
||||
}
|
||||
}
|
||||
return nominated, nil
|
||||
}
|
||||
|
||||
// Volunteer offers yourself up to be a server if needed.
|
||||
func Volunteer(obj *EmbdEtcd, urls etcdtypes.URLs) error {
|
||||
if obj.flags.Trace {
|
||||
log.Printf("Trace: Etcd: Volunteer(%v): %v", obj.hostname, urls.String())
|
||||
defer log.Printf("Trace: Etcd: Volunteer(%v): Finished!", obj.hostname)
|
||||
}
|
||||
// volunteer to be a server
|
||||
volunteer := fmt.Sprintf("%s/volunteers/%s", NS, obj.hostname)
|
||||
key := fmt.Sprintf(obj.NS+volunteerPathFmt, obj.Hostname)
|
||||
ifs := []etcd.Cmp{} // list matching the desired state
|
||||
ops := []etcd.Op{} // list of ops in this txn
|
||||
els := []etcd.Op{}
|
||||
if urls != nil {
|
||||
// XXX: adding a TTL is crucial! (i think)
|
||||
ops = append(ops, etcd.OpPut(volunteer, urls.String())) // value is usually a peer "serverURL"
|
||||
data := urls.String() // value is usually a peer "serverURL"
|
||||
// XXX: bug: https://github.com/etcd-io/etcd/issues/10566
|
||||
// XXX: reverse things with els to workaround the bug :(
|
||||
//ifs = append(ifs, etcd.Compare(etcd.Value(key), "!=", data)) // desired state
|
||||
//ops = append(ops, etcd.OpPut(key, data, etcd.WithLease(obj.leaseID)))
|
||||
ifs = append(ifs, etcd.Compare(etcd.Value(key), "=", data)) // desired state
|
||||
ifs = append(ifs, etcd.Compare(etcd.LeaseValue(key), "=", obj.leaseID))
|
||||
els = append(ops, etcd.OpPut(key, data, etcd.WithLease(obj.leaseID)))
|
||||
|
||||
} else { // delete message if set to erase
|
||||
ops = append(ops, etcd.OpDelete(volunteer))
|
||||
ifs = append(ifs, etcdutil.KeyExists(key)) // desired state
|
||||
ops = append(ops, etcd.OpDelete(key))
|
||||
}
|
||||
|
||||
if _, err := obj.Txn(nil, ops, nil); err != nil {
|
||||
return fmt.Errorf("volunteering failed") // exit in progress?
|
||||
_, err := obj.client.Txn(ctx, ifs, ops, els)
|
||||
msg := "volunteering failed"
|
||||
if urls == nil {
|
||||
msg = "unvolunteering failed"
|
||||
}
|
||||
return nil
|
||||
return errwrap.Wrapf(err, msg)
|
||||
}
|
||||
|
||||
// Volunteers returns a urls map of available etcd server volunteers.
|
||||
func Volunteers(obj *EmbdEtcd) (etcdtypes.URLsMap, error) {
|
||||
if obj.flags.Trace {
|
||||
log.Printf("Trace: Etcd: Volunteers()")
|
||||
defer log.Printf("Trace: Etcd: Volunteers(): Finished!")
|
||||
// nominate nominates a particular client to be a server (peer). If you specify
|
||||
// a nil value for urls, then this will unnominate that member.
|
||||
func (obj *EmbdEtcd) nominate(ctx context.Context, hostname string, urls etcdtypes.URLs) error {
|
||||
if obj.Debug {
|
||||
if urls == nil {
|
||||
obj.Logf("unnominate(%s)...", hostname)
|
||||
defer obj.Logf("unnominate(%s): done!", hostname)
|
||||
} else {
|
||||
obj.Logf("nominate(%s): %s", hostname, urls.String())
|
||||
defer obj.Logf("nominate(%s): done!", hostname)
|
||||
}
|
||||
path := fmt.Sprintf("%s/volunteers/", NS)
|
||||
keyMap, err := obj.Get(path, etcd.WithPrefix())
|
||||
}
|
||||
// nominate someone to be a server
|
||||
key := fmt.Sprintf(obj.NS+nominatedPathFmt, hostname)
|
||||
ifs := []etcd.Cmp{} // list matching the desired state
|
||||
ops := []etcd.Op{} // list of ops in this txn
|
||||
els := []etcd.Op{}
|
||||
if urls != nil {
|
||||
data := urls.String()
|
||||
// XXX: bug: https://github.com/etcd-io/etcd/issues/10566
|
||||
// XXX: reverse things with els to workaround the bug :(
|
||||
//ifs = append(ifs, etcd.Compare(etcd.Value(key), "!=", data)) // desired state
|
||||
//ops = append(ops, etcd.OpPut(key, data)) // TODO: add a TTL? (etcd.WithLease)
|
||||
ifs = append(ifs, etcd.Compare(etcd.Value(key), "=", data)) // desired state
|
||||
els = append(ops, etcd.OpPut(key, data)) // TODO: add a TTL? (etcd.WithLease)
|
||||
|
||||
} else { // delete message if set to erase
|
||||
ifs = append(ifs, etcdutil.KeyExists(key)) // desired state
|
||||
ops = append(ops, etcd.OpDelete(key))
|
||||
}
|
||||
|
||||
_, err := obj.client.Txn(ctx, ifs, ops, els)
|
||||
msg := "nominate failed"
|
||||
if urls == nil {
|
||||
msg = "unnominate failed"
|
||||
}
|
||||
return errwrap.Wrapf(err, msg)
|
||||
}
|
||||
|
||||
// advertise idempotently advertises the list of available client endpoints for
|
||||
// the given member. If you specify a nil value for urls, then this will remove
|
||||
// that member.
|
||||
func (obj *EmbdEtcd) advertise(ctx context.Context, hostname string, urls etcdtypes.URLs) error {
|
||||
if obj.Debug {
|
||||
if urls == nil {
|
||||
obj.Logf("unadvertise(%s)...", hostname)
|
||||
defer obj.Logf("unadvertise(%s): done!", hostname)
|
||||
} else {
|
||||
obj.Logf("advertise(%s): %s", hostname, urls.String())
|
||||
defer obj.Logf("advertise(%s): done!", hostname)
|
||||
}
|
||||
}
|
||||
// advertise endpoints
|
||||
key := fmt.Sprintf(obj.NS+endpointsPathFmt, hostname)
|
||||
ifs := []etcd.Cmp{} // list matching the desired state
|
||||
ops := []etcd.Op{} // list of ops in this txn
|
||||
els := []etcd.Op{}
|
||||
if urls != nil {
|
||||
data := urls.String() // value is usually a "clientURL"
|
||||
// XXX: bug: https://github.com/etcd-io/etcd/issues/10566
|
||||
// XXX: reverse things with els to workaround the bug :(
|
||||
//ifs = append(ifs, etcd.Compare(etcd.Value(key), "!=", data)) // desired state
|
||||
//ops = append(ops, etcd.OpPut(key, data, etcd.WithLease(obj.leaseID)))
|
||||
ifs = append(ifs, etcd.Compare(etcd.Value(key), "=", data)) // desired state
|
||||
ifs = append(ifs, etcd.Compare(etcd.LeaseValue(key), "=", obj.leaseID))
|
||||
els = append(ops, etcd.OpPut(key, data, etcd.WithLease(obj.leaseID)))
|
||||
} else { // delete in this case
|
||||
ifs = append(ifs, etcdutil.KeyExists(key)) // desired state
|
||||
ops = append(ops, etcd.OpDelete(key))
|
||||
}
|
||||
|
||||
_, err := obj.client.Txn(ctx, ifs, ops, els)
|
||||
msg := "advertising failed"
|
||||
if urls == nil {
|
||||
msg = "unadvertising failed"
|
||||
}
|
||||
return errwrap.Wrapf(err, msg)
|
||||
}
|
||||
|
||||
// getVolunteers returns a urls map of available etcd server volunteers.
|
||||
func (obj *EmbdEtcd) getVolunteers(ctx context.Context) (etcdtypes.URLsMap, error) {
|
||||
if obj.Debug {
|
||||
obj.Logf("getVolunteers()")
|
||||
defer obj.Logf("getVolunteers(): done!")
|
||||
}
|
||||
p := obj.NS + VolunteerPath
|
||||
keyMap, err := obj.client.Get(ctx, p, etcd.WithPrefix())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("volunteers aren't available: %v", err)
|
||||
return nil, errwrap.Wrapf(err, "can't get peer volunteers")
|
||||
}
|
||||
volunteers := make(etcdtypes.URLsMap)
|
||||
for key, val := range keyMap { // loop through directory of volunteers
|
||||
if !strings.HasPrefix(key, path) {
|
||||
if !strings.HasPrefix(key, p) {
|
||||
continue
|
||||
}
|
||||
name := key[len(path):] // get name of volunteer
|
||||
name := key[len(p):] // get name of volunteer
|
||||
if val == "" { // skip "erased" values
|
||||
continue
|
||||
}
|
||||
urls, err := etcdtypes.NewURLs(strings.Split(val, ","))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("volunteers data format error: %v", err)
|
||||
return nil, errwrap.Wrapf(err, "data format error")
|
||||
}
|
||||
volunteers[name] = urls // add to map
|
||||
if obj.flags.Debug {
|
||||
log.Printf("Etcd: Volunteer(%v): %v", name, val)
|
||||
}
|
||||
}
|
||||
return volunteers, nil
|
||||
}
|
||||
|
||||
// AdvertiseEndpoints advertises the list of available client endpoints.
|
||||
func AdvertiseEndpoints(obj *EmbdEtcd, urls etcdtypes.URLs) error {
|
||||
if obj.flags.Trace {
|
||||
log.Printf("Trace: Etcd: AdvertiseEndpoints(%v): %v", obj.hostname, urls.String())
|
||||
defer log.Printf("Trace: Etcd: AdvertiseEndpoints(%v): Finished!", obj.hostname)
|
||||
// getNominated returns a urls map of nominated etcd server volunteers.
|
||||
// NOTE: I know 'nominees' might be more correct, but is less consistent here
|
||||
func (obj *EmbdEtcd) getNominated(ctx context.Context) (etcdtypes.URLsMap, error) {
|
||||
if obj.Debug {
|
||||
obj.Logf("getNominated()")
|
||||
defer obj.Logf("getNominated(): done!")
|
||||
}
|
||||
// advertise endpoints
|
||||
endpoints := fmt.Sprintf("%s/endpoints/%s", NS, obj.hostname)
|
||||
ops := []etcd.Op{} // list of ops in this txn
|
||||
if urls != nil {
|
||||
// TODO: add a TTL? (etcd.WithLease)
|
||||
ops = append(ops, etcd.OpPut(endpoints, urls.String())) // value is usually a "clientURL"
|
||||
|
||||
} else { // delete message if set to erase
|
||||
ops = append(ops, etcd.OpDelete(endpoints))
|
||||
}
|
||||
|
||||
if _, err := obj.Txn(nil, ops, nil); err != nil {
|
||||
return fmt.Errorf("endpoint advertising failed") // exit in progress?
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Endpoints returns a urls map of available etcd server endpoints.
|
||||
func Endpoints(obj *EmbdEtcd) (etcdtypes.URLsMap, error) {
|
||||
if obj.flags.Trace {
|
||||
log.Printf("Trace: Etcd: Endpoints()")
|
||||
defer log.Printf("Trace: Etcd: Endpoints(): Finished!")
|
||||
}
|
||||
path := fmt.Sprintf("%s/endpoints/", NS)
|
||||
keyMap, err := obj.Get(path, etcd.WithPrefix())
|
||||
p := obj.NS + NominatedPath
|
||||
keyMap, err := obj.client.Get(ctx, p, etcd.WithPrefix()) // map[string]string, bool
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("endpoints aren't available: %v", err)
|
||||
return nil, errwrap.Wrapf(err, "can't get nominated peers")
|
||||
}
|
||||
endpoints := make(etcdtypes.URLsMap)
|
||||
for key, val := range keyMap { // loop through directory of endpoints
|
||||
if !strings.HasPrefix(key, path) {
|
||||
nominated := make(etcdtypes.URLsMap)
|
||||
for key, val := range keyMap { // loop through directory of nominated
|
||||
if !strings.HasPrefix(key, p) {
|
||||
continue
|
||||
}
|
||||
name := key[len(path):] // get name of volunteer
|
||||
name := key[len(p):] // get name of nominee
|
||||
if val == "" { // skip "erased" values
|
||||
continue
|
||||
}
|
||||
urls, err := etcdtypes.NewURLs(strings.Split(val, ","))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("endpoints data format error: %v", err)
|
||||
return nil, errwrap.Wrapf(err, "data format error")
|
||||
}
|
||||
endpoints[name] = urls // add to map
|
||||
if obj.flags.Debug {
|
||||
log.Printf("Etcd: Endpoint(%v): %v", name, val)
|
||||
nominated[name] = urls // add to map
|
||||
}
|
||||
}
|
||||
return endpoints, nil
|
||||
return nominated, nil
|
||||
}
|
||||
|
||||
// SetHostnameConverged sets whether a specific hostname is converged.
|
||||
func SetHostnameConverged(obj *EmbdEtcd, hostname string, isConverged bool) error {
|
||||
if obj.flags.Trace {
|
||||
log.Printf("Trace: Etcd: SetHostnameConverged(%s): %v", hostname, isConverged)
|
||||
defer log.Printf("Trace: Etcd: SetHostnameConverged(%v): Finished!", hostname)
|
||||
// getEndpoints returns a urls map of available endpoints for clients.
|
||||
func (obj *EmbdEtcd) getEndpoints(ctx context.Context) (etcdtypes.URLsMap, error) {
|
||||
if obj.Debug {
|
||||
obj.Logf("getEndpoints()")
|
||||
defer obj.Logf("getEndpoints(): done!")
|
||||
}
|
||||
converged := fmt.Sprintf("%s/converged/%s", NS, hostname)
|
||||
op := []etcd.Op{etcd.OpPut(converged, fmt.Sprintf("%t", isConverged))}
|
||||
if _, err := obj.Txn(nil, op, nil); err != nil { // TODO: do we need a skipConv flag here too?
|
||||
return fmt.Errorf("set converged failed") // exit in progress?
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// HostnameConverged returns a map of every hostname's converged state.
|
||||
func HostnameConverged(obj *EmbdEtcd) (map[string]bool, error) {
|
||||
if obj.flags.Trace {
|
||||
log.Printf("Trace: Etcd: HostnameConverged()")
|
||||
defer log.Printf("Trace: Etcd: HostnameConverged(): Finished!")
|
||||
}
|
||||
path := fmt.Sprintf("%s/converged/", NS)
|
||||
keyMap, err := obj.ComplexGet(path, true, etcd.WithPrefix()) // don't un-converge
|
||||
p := obj.NS + EndpointsPath
|
||||
keyMap, err := obj.client.Get(ctx, p, etcd.WithPrefix())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("converged values aren't available: %v", err)
|
||||
return nil, errwrap.Wrapf(err, "can't get client endpoints")
|
||||
}
|
||||
converged := make(map[string]bool)
|
||||
for key, val := range keyMap { // loop through directory...
|
||||
if !strings.HasPrefix(key, path) {
|
||||
endpoints := make(etcdtypes.URLsMap)
|
||||
for key, val := range keyMap { // loop through directory of endpoints
|
||||
if !strings.HasPrefix(key, p) {
|
||||
continue
|
||||
}
|
||||
name := key[len(path):] // get name of key
|
||||
name := key[len(p):] // get name of volunteer
|
||||
if val == "" { // skip "erased" values
|
||||
continue
|
||||
}
|
||||
b, err := strconv.ParseBool(val)
|
||||
urls, err := etcdtypes.NewURLs(strings.Split(val, ","))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("converged data format error: %v", err)
|
||||
return nil, errwrap.Wrapf(err, "data format error")
|
||||
}
|
||||
converged[name] = b // add to map
|
||||
endpoints[name] = urls // add to map
|
||||
}
|
||||
return converged, nil
|
||||
}
|
||||
|
||||
// AddHostnameConvergedWatcher adds a watcher with a callback that runs on
|
||||
// hostname state changes.
|
||||
func AddHostnameConvergedWatcher(obj *EmbdEtcd, callbackFn func(map[string]bool) error) (func(), error) {
|
||||
path := fmt.Sprintf("%s/converged/", NS)
|
||||
internalCbFn := func(re *RE) error {
|
||||
// TODO: get the value from the response, and apply delta...
|
||||
// for now, just run a get operation which is easier to code!
|
||||
m, err := HostnameConverged(obj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return callbackFn(m) // call my function
|
||||
}
|
||||
return obj.AddWatcher(path, internalCbFn, true, true, etcd.WithPrefix()) // no block and no converger reset
|
||||
}
|
||||
|
||||
// SetClusterSize sets the ideal target cluster size of etcd peers.
|
||||
func SetClusterSize(obj *EmbdEtcd, value uint16) error {
|
||||
if obj.flags.Trace {
|
||||
log.Printf("Trace: Etcd: SetClusterSize(): %v", value)
|
||||
defer log.Printf("Trace: Etcd: SetClusterSize(): Finished!")
|
||||
}
|
||||
key := fmt.Sprintf("%s/idealClusterSize", NS)
|
||||
|
||||
if err := obj.Set(key, strconv.FormatUint(uint64(value), 10)); err != nil {
|
||||
return fmt.Errorf("function SetClusterSize failed: %v", err) // exit in progress?
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetClusterSize gets the ideal target cluster size of etcd peers.
|
||||
func GetClusterSize(obj *EmbdEtcd) (uint16, error) {
|
||||
key := fmt.Sprintf("%s/idealClusterSize", NS)
|
||||
keyMap, err := obj.Get(key)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("function GetClusterSize failed: %v", err)
|
||||
}
|
||||
|
||||
val, exists := keyMap[key]
|
||||
if !exists || val == "" {
|
||||
return 0, fmt.Errorf("function GetClusterSize failed: %v", err)
|
||||
}
|
||||
|
||||
v, err := strconv.ParseUint(val, 10, 16)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("function GetClusterSize failed: %v", err)
|
||||
}
|
||||
return uint16(v), nil
|
||||
}
|
||||
|
||||
// MemberAdd adds a member to the cluster.
|
||||
func MemberAdd(obj *EmbdEtcd, peerURLs etcdtypes.URLs) (*etcd.MemberAddResponse, error) {
|
||||
//obj.Connect(false) // TODO: ?
|
||||
ctx := context.Background()
|
||||
var response *etcd.MemberAddResponse
|
||||
var err error
|
||||
for {
|
||||
if obj.exiting { // the exit signal has been sent!
|
||||
return nil, fmt.Errorf("exiting etcd")
|
||||
}
|
||||
obj.rLock.RLock()
|
||||
response, err = obj.client.MemberAdd(ctx, peerURLs.StringSlice())
|
||||
obj.rLock.RUnlock()
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
if ctx, err = obj.CtxError(ctx, err); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return response, nil
|
||||
}
|
||||
|
||||
// MemberRemove removes a member by mID and returns if it worked, and also
|
||||
// if there was an error. This is because it might have run without error, but
|
||||
// the member wasn't found, for example.
|
||||
func MemberRemove(obj *EmbdEtcd, mID uint64) (bool, error) {
|
||||
//obj.Connect(false) // TODO: ?
|
||||
ctx := context.Background()
|
||||
for {
|
||||
if obj.exiting { // the exit signal has been sent!
|
||||
return false, fmt.Errorf("exiting etcd")
|
||||
}
|
||||
obj.rLock.RLock()
|
||||
_, err := obj.client.MemberRemove(ctx, mID)
|
||||
obj.rLock.RUnlock()
|
||||
if err == nil {
|
||||
break
|
||||
} else if err == rpctypes.ErrMemberNotFound {
|
||||
// if we get this, member already shut itself down :)
|
||||
return false, nil
|
||||
}
|
||||
if ctx, err = obj.CtxError(ctx, err); err != nil {
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// Members returns information on cluster membership.
|
||||
// The member ID's are the keys, because an empty names means unstarted!
|
||||
// TODO: consider queueing this through the main loop with CtxError(ctx, err)
|
||||
func Members(obj *EmbdEtcd) (map[uint64]string, error) {
|
||||
//obj.Connect(false) // TODO: ?
|
||||
ctx := context.Background()
|
||||
var response *etcd.MemberListResponse
|
||||
var err error
|
||||
for {
|
||||
if obj.exiting { // the exit signal has been sent!
|
||||
return nil, fmt.Errorf("exiting etcd")
|
||||
}
|
||||
obj.rLock.RLock()
|
||||
if obj.flags.Trace {
|
||||
log.Printf("Trace: Etcd: Members(): Endpoints are: %v", obj.client.Endpoints())
|
||||
}
|
||||
response, err = obj.client.MemberList(ctx)
|
||||
obj.rLock.RUnlock()
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
if ctx, err = obj.CtxError(ctx, err); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
members := make(map[uint64]string)
|
||||
for _, x := range response.Members {
|
||||
members[x.ID] = x.Name // x.Name will be "" if unstarted!
|
||||
}
|
||||
return members, nil
|
||||
}
|
||||
|
||||
// Leader returns the current leader of the etcd server cluster.
|
||||
func Leader(obj *EmbdEtcd) (string, error) {
|
||||
//obj.Connect(false) // TODO: ?
|
||||
membersMap, err := Members(obj)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
addresses := obj.LocalhostClientURLs() // heuristic, but probably correct
|
||||
if len(addresses) == 0 {
|
||||
// probably a programming error...
|
||||
return "", fmt.Errorf("programming error")
|
||||
}
|
||||
endpoint := addresses[0].Host // FIXME: arbitrarily picked the first one
|
||||
|
||||
// part two
|
||||
ctx := context.Background()
|
||||
var response *etcd.StatusResponse
|
||||
for {
|
||||
if obj.exiting { // the exit signal has been sent!
|
||||
return "", fmt.Errorf("exiting etcd")
|
||||
}
|
||||
|
||||
obj.rLock.RLock()
|
||||
response, err = obj.client.Maintenance.Status(ctx, endpoint)
|
||||
obj.rLock.RUnlock()
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
if ctx, err = obj.CtxError(ctx, err); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
// isLeader: response.Header.MemberId == response.Leader
|
||||
for id, name := range membersMap {
|
||||
if id == response.Leader {
|
||||
return name, nil
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("members map is not current") // not found
|
||||
return endpoints, nil
|
||||
}
|
||||
|
||||
309
etcd/server.go
Normal file
309
etcd/server.go
Normal file
@@ -0,0 +1,309 @@
|
||||
// Mgmt
|
||||
// Copyright (C) 2013-2019+ James Shubin and the project contributors
|
||||
// Written by James Shubin <james@shubin.ca> and the project contributors
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package etcd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/purpleidea/mgmt/util"
|
||||
"github.com/purpleidea/mgmt/util/errwrap"
|
||||
|
||||
"github.com/coreos/etcd/embed"
|
||||
etcdtypes "github.com/coreos/etcd/pkg/types"
|
||||
)
|
||||
|
||||
const (
|
||||
// MaxServerStartTimeout is the amount of time to wait for the server
|
||||
// to start before considering it a failure. If you hit this timeout,
|
||||
// let us know so that we can analyze the situation, and increase this
|
||||
// if necessary.
|
||||
MaxServerStartTimeout = 60 * time.Second
|
||||
|
||||
// MaxServerCloseTimeout is the maximum amount of time we'll wait for
|
||||
// the server to close down. If it exceeds this, it's probably a bug.
|
||||
MaxServerCloseTimeout = 15 * time.Second
|
||||
|
||||
// MaxServerRetries is the maximum number of times we can try to restart
|
||||
// the server if it fails on startup. This can help workaround some
|
||||
// timing bugs in etcd.
|
||||
MaxServerRetries = 5
|
||||
|
||||
// ServerRetryWait is the amount of time to wait between retries.
|
||||
ServerRetryWait = 500 * time.Millisecond
|
||||
)
|
||||
|
||||
// serverAction represents the desired server state.
|
||||
type serverAction uint8
|
||||
|
||||
const (
|
||||
serverActionStop serverAction = iota
|
||||
serverActionStart
|
||||
)
|
||||
|
||||
// serverAction returns whether we should do the action requested. The action is
|
||||
// either start (true) or stop (false) as input. For example, if we run this as:
|
||||
// true -> true, it means we asked if we should start, and the answer is yes.
|
||||
func (obj *EmbdEtcd) serverAction(action serverAction) bool {
|
||||
// check if i have actually volunteered first of all...
|
||||
if obj.NoServer || len(obj.ServerURLs) == 0 {
|
||||
obj.Logf("inappropriately nominated, rogue or stale server?")
|
||||
return false // no action
|
||||
}
|
||||
|
||||
_, exists := obj.nominated[obj.Hostname] // am i nominated?
|
||||
|
||||
// if there are no other peers, we create a new server
|
||||
// TODO: do we need an || len(obj.nominated) == 0 if we're the first?
|
||||
newCluster := len(obj.nominated) == 1 && exists
|
||||
|
||||
switch action {
|
||||
case serverActionStart:
|
||||
// we start if...
|
||||
return obj.server == nil && (exists || newCluster)
|
||||
|
||||
case serverActionStop:
|
||||
// we stop if...
|
||||
return obj.server != nil && !exists
|
||||
}
|
||||
|
||||
return false // no action needed
|
||||
}
|
||||
|
||||
// runServer kicks of a new embedded etcd server. It exits when the server shuts
|
||||
// down. The exit can be triggered at any time by running destroyServer or if it
|
||||
// exits due to some condition like an error.
|
||||
// FIXME: should peerURLsMap just use obj.nominated instead?
|
||||
func (obj *EmbdEtcd) runServer(newCluster bool, peerURLsMap etcdtypes.URLsMap) (reterr error) {
|
||||
obj.Logf("server: runServer: (newCluster=%t): %+v", newCluster, peerURLsMap)
|
||||
defer obj.Logf("server: runServer: done!")
|
||||
//obj.serverwg.Wait() // bonus, but instead, a mutex would be race free!
|
||||
obj.serverwg.Add(1)
|
||||
defer obj.serverwg.Done()
|
||||
defer obj.serverExitsSignal.Send()
|
||||
dataDir := fmt.Sprintf("%s/", path.Join(obj.Prefix, "server"))
|
||||
if err := os.MkdirAll(dataDir, 0770); err != nil {
|
||||
return errwrap.Wrapf(err, "couldn't mkdir: %s", dataDir)
|
||||
}
|
||||
|
||||
memberName := obj.Hostname
|
||||
|
||||
// if no peer URLs exist, then starting a server is mostly only for some
|
||||
// testing, but etcd doesn't allow the value to be empty so we use this!
|
||||
peerURLs, err := etcdtypes.NewURLs([]string{"http://localhost:0"})
|
||||
if err != nil {
|
||||
return errwrap.Wrapf(err, "invalid URLs")
|
||||
}
|
||||
if len(obj.ServerURLs) > 0 {
|
||||
peerURLs = obj.ServerURLs
|
||||
}
|
||||
initialPeerURLsMap, err := copyURLsMap(peerURLsMap)
|
||||
if err != nil {
|
||||
return errwrap.Wrapf(err, "error copying URLsMap")
|
||||
}
|
||||
// add self to list if it's not already in there...
|
||||
if _, exists := peerURLsMap[memberName]; !exists {
|
||||
initialPeerURLsMap[memberName] = peerURLs
|
||||
}
|
||||
|
||||
// TODO: do we need to copy?
|
||||
aPUrls := peerURLs
|
||||
if len(obj.AServerURLs) > 0 {
|
||||
aPUrls = obj.AServerURLs
|
||||
}
|
||||
// NOTE: this logic is similar to obj.curls()
|
||||
aCUrls := obj.ClientURLs
|
||||
if len(obj.AClientURLs) > 0 {
|
||||
aCUrls = obj.AClientURLs
|
||||
}
|
||||
|
||||
// embed etcd
|
||||
cfg := embed.NewConfig()
|
||||
cfg.Name = memberName // hostname
|
||||
cfg.Dir = dataDir
|
||||
cfg.LPUrls = peerURLs
|
||||
cfg.LCUrls = obj.ClientURLs
|
||||
cfg.APUrls = aPUrls
|
||||
cfg.ACUrls = aCUrls
|
||||
cfg.StrictReconfigCheck = false // XXX: workaround https://github.com/coreos/etcd/issues/6305
|
||||
cfg.MaxTxnOps = DefaultMaxTxnOps
|
||||
|
||||
cfg.InitialCluster = initialPeerURLsMap.String() // including myself!
|
||||
if newCluster {
|
||||
cfg.ClusterState = embed.ClusterStateFlagNew
|
||||
} else {
|
||||
cfg.ClusterState = embed.ClusterStateFlagExisting
|
||||
}
|
||||
//cfg.ForceNewCluster = newCluster // TODO: ?
|
||||
|
||||
if err := cfg.Validate(); err != nil {
|
||||
return errwrap.Wrapf(err, "server config is invalid")
|
||||
}
|
||||
|
||||
obj.Logf("server: starting...")
|
||||
// TODO: etcd panics with: `create wal error: no space left on device`
|
||||
// see: https://github.com/etcd-io/etcd/issues/10588
|
||||
defer func() {
|
||||
if r := recover(); r != nil { // magic panic catcher
|
||||
obj.Logf("server: panic: %s", r)
|
||||
reterr = fmt.Errorf("panic during start with: %s", r) // set named return err
|
||||
}
|
||||
}()
|
||||
// XXX: workaround: https://github.com/etcd-io/etcd/issues/10626
|
||||
// This runs when we see the nominate operation. This could also error
|
||||
// if this races to start up, and happens before the member add runs.
|
||||
count := 0
|
||||
for {
|
||||
obj.server, err = embed.StartEtcd(cfg)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
e := err.Error()
|
||||
// catch: error validating peerURLs ... member count is unequal
|
||||
if strings.HasPrefix(e, "error validating peerURLs") && strings.HasSuffix(e, "member count is unequal") {
|
||||
count++
|
||||
if count > MaxServerRetries {
|
||||
err = errwrap.Wrapf(err, "workaround retries (%d) exceeded", MaxServerRetries)
|
||||
break
|
||||
}
|
||||
obj.Logf("waiting %s for retry", ServerRetryWait.String())
|
||||
time.Sleep(ServerRetryWait)
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
defer func() {
|
||||
obj.server = nil // important because this is used as an isRunning flag
|
||||
}()
|
||||
if err != nil {
|
||||
// early debug logs in case something downstream blocks
|
||||
if obj.Debug {
|
||||
obj.Logf("server failing with: %+v", err)
|
||||
}
|
||||
return errwrap.Wrapf(err, "server start failed")
|
||||
}
|
||||
|
||||
closedChan := make(chan struct{})
|
||||
defer func() {
|
||||
select {
|
||||
case <-time.After(MaxServerCloseTimeout):
|
||||
obj.Logf("server: close timeout of %s reached", MaxServerCloseTimeout.String())
|
||||
case <-closedChan:
|
||||
}
|
||||
}()
|
||||
defer func() {
|
||||
// no wg here, since we want to let it die on exit if need be...
|
||||
// XXX: workaround: https://github.com/etcd-io/etcd/issues/10600
|
||||
go func() {
|
||||
obj.server.Close() // this blocks until server has stopped
|
||||
close(closedChan) // woo!
|
||||
}()
|
||||
}()
|
||||
defer obj.server.Server.Stop() // trigger a shutdown
|
||||
|
||||
select {
|
||||
case <-obj.server.Server.ReadyNotify(): // we hang here if things are bad
|
||||
obj.Logf("server: ready") // it didn't hang!
|
||||
|
||||
// TODO: should we wait for this notification elsewhere?
|
||||
case <-obj.server.Server.StopNotify(): // it's going down now...
|
||||
err := fmt.Errorf("received stop notification")
|
||||
obj.Logf("server: stopped: %v", err)
|
||||
return err
|
||||
|
||||
case <-time.After(MaxServerStartTimeout):
|
||||
err := fmt.Errorf("start timeout of %s reached", MaxServerStartTimeout.String())
|
||||
obj.Logf("server: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
obj.serverID = uint64(obj.server.Server.ID()) // store member id for internal use
|
||||
defer func() {
|
||||
obj.serverID = 0 // reset
|
||||
}()
|
||||
obj.addSelfState() // add to endpoints list so self client can connect!
|
||||
obj.setEndpoints() // sync client with new endpoints
|
||||
defer obj.setEndpoints()
|
||||
defer obj.rmMemberState(obj.Hostname)
|
||||
|
||||
obj.serverReadySignal.Send() // send a signal, and then reset the signal
|
||||
|
||||
for {
|
||||
select {
|
||||
case err, ok := <-obj.server.Err():
|
||||
if !ok { // server shut down
|
||||
return errwrap.Wrapf(err, "server shutdown error")
|
||||
}
|
||||
|
||||
case <-obj.serverExit.Signal():
|
||||
return errwrap.Wrapf(obj.serverExit.Error(), "server signal exit")
|
||||
}
|
||||
}
|
||||
|
||||
//return nil // unreachable
|
||||
}
|
||||
|
||||
// destroyServer shuts down the embedded etcd server portion.
|
||||
func (obj *EmbdEtcd) destroyServer() error {
|
||||
// This function must be thread-safe because a destroy request will
|
||||
// cause runServer to return, which then runs the defer of this function
|
||||
// which is meant to clean up when an independent, normal runServer
|
||||
// return happens. Add the mutex to protect against races on this call.
|
||||
obj.servermu.Lock()
|
||||
defer obj.servermu.Unlock()
|
||||
if obj.server == nil {
|
||||
return nil // don't error on redundant calls
|
||||
}
|
||||
obj.Logf("server: destroyServer...")
|
||||
defer obj.Logf("server: destroyServer: done!")
|
||||
|
||||
obj.serverExit.Done(nil) // trigger an exit
|
||||
|
||||
obj.serverwg.Wait() // wait for server to finish shutting down
|
||||
defer func() {
|
||||
obj.serverExit = util.NewEasyExit() // reset
|
||||
}()
|
||||
return obj.serverExit.Error()
|
||||
}
|
||||
|
||||
// ServerReady returns a channel that closes when we're up and running. This
|
||||
// process happens when calling runServer. If runServer is never called, this
|
||||
// will never happen. It also returns a cancel/ack function which must be called
|
||||
// once the signal is received or we are done watching it. This is because this
|
||||
// is a cyclical signal which happens, and then gets reset as the server starts
|
||||
// up, shuts down, and repeats the cycle. The cancel/ack function ensures that
|
||||
// we only watch a signal when it's ready to be read, and only reset it when we
|
||||
// are done watching it.
|
||||
func (obj *EmbdEtcd) ServerReady() (<-chan struct{}, func()) {
|
||||
return obj.serverReadySignal.Subscribe()
|
||||
}
|
||||
|
||||
// ServerExited returns a channel that closes when the server is destroyed. This
|
||||
// process happens after runServer exits. If runServer is never called, this
|
||||
// will never happen. It also returns a cancel/ack function which must be called
|
||||
// once the signal is received or we are done watching it. This is because this
|
||||
// is a cyclical signal which happens, and then gets reset as the server starts
|
||||
// up, shuts down, and repeats the cycle. The cancel/ack function ensures that
|
||||
// we only watch a signal when it's ready to be read, and only reset it when we
|
||||
// are done watching it.
|
||||
func (obj *EmbdEtcd) ServerExited() (<-chan struct{}, func()) {
|
||||
return obj.serverExitsSignal.Subscribe()
|
||||
}
|
||||
163
etcd/tasks.go
Normal file
163
etcd/tasks.go
Normal file
@@ -0,0 +1,163 @@
|
||||
// Mgmt
|
||||
// Copyright (C) 2013-2019+ James Shubin and the project contributors
|
||||
// Written by James Shubin <james@shubin.ca> and the project contributors
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package etcd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/purpleidea/mgmt/util/errwrap"
|
||||
)
|
||||
|
||||
// task represents a single task to run. These are useful for pending work that
|
||||
// we want to schedule, but that shouldn't permanently error the system on
|
||||
// error. In particular idempotent tasks that are safe are ideal for this queue.
|
||||
// The tasks can be added with queueTask.
|
||||
type task struct {
|
||||
name string // name of task
|
||||
fn func() error // task to run
|
||||
retry int // number of times to retry on error, -1 for infinite
|
||||
block bool // should we block the queue until this succeeds?
|
||||
report bool // should we report the error on permanent failure?
|
||||
}
|
||||
|
||||
// String prints a string representation of the struct.
|
||||
func (obj *task) String() string {
|
||||
return fmt.Sprintf("task(%s)", obj.name)
|
||||
}
|
||||
|
||||
// queueTask adds a task to the task worker queue. If you want to specify any
|
||||
// properties that differ from the defaults, use queueRawTask instead.
|
||||
func (obj *EmbdEtcd) queueTask(fn func() error) error {
|
||||
obj.taskQueueLock.Lock()
|
||||
obj.taskQueueLock.Unlock()
|
||||
t := &task{
|
||||
fn: fn,
|
||||
}
|
||||
return obj.queueRawTask(t)
|
||||
}
|
||||
|
||||
// queueRawTask adds a task of any format to the queue. You should not name your
|
||||
// task a string which could match a positive integer. Those names are used when
|
||||
// an unnamed task is specified and the system needs to generate a name.
|
||||
func (obj *EmbdEtcd) queueRawTask(t *task) error {
|
||||
if obj.Debug {
|
||||
obj.Logf("queueRawTask()")
|
||||
defer obj.Logf("queueRawTask(): done!")
|
||||
}
|
||||
|
||||
if t == nil {
|
||||
return fmt.Errorf("nil task")
|
||||
}
|
||||
|
||||
obj.taskQueueLock.Lock()
|
||||
defer obj.taskQueueLock.Unlock()
|
||||
if obj.taskQueue == nil { // killed signal
|
||||
return fmt.Errorf("task queue killed")
|
||||
}
|
||||
if t.name == "" {
|
||||
obj.taskQueueID++ // increment
|
||||
t.name = fmt.Sprintf("%d", obj.taskQueueID)
|
||||
}
|
||||
|
||||
obj.taskQueue = append(obj.taskQueue, t)
|
||||
if !obj.taskQueueRunning {
|
||||
obj.taskQueueRunning = true
|
||||
obj.taskQueueWg.Add(1)
|
||||
go obj.runTaskQueue()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// killTaskQueue empties the task queue, causing it to shutdown.
|
||||
func (obj *EmbdEtcd) killTaskQueue() int {
|
||||
obj.taskQueueLock.Lock()
|
||||
count := len(obj.taskQueue)
|
||||
obj.taskQueue = nil // clear queue
|
||||
obj.taskQueueLock.Unlock()
|
||||
|
||||
obj.taskQueueWg.Wait() // wait for queue to exit
|
||||
obj.taskQueue = []*task{} // reset
|
||||
return count // number of tasks deleted
|
||||
}
|
||||
|
||||
// runTaskQueue processes the task queue. This is started automatically by
|
||||
// queueTask if needed. It will shut itself down when the queue is empty.
|
||||
func (obj *EmbdEtcd) runTaskQueue() {
|
||||
defer obj.taskQueueWg.Done() // added in queueTask
|
||||
for {
|
||||
obj.taskQueueLock.Lock()
|
||||
if obj.taskQueue == nil || len(obj.taskQueue) == 0 {
|
||||
defer obj.taskQueueLock.Unlock()
|
||||
obj.taskQueueRunning = false
|
||||
return
|
||||
}
|
||||
var t *task
|
||||
t, obj.taskQueue = obj.taskQueue[0], obj.taskQueue[1:]
|
||||
obj.taskQueueLock.Unlock()
|
||||
|
||||
if !t.block {
|
||||
if obj.Debug {
|
||||
obj.Logf("%s: run...", t)
|
||||
}
|
||||
err := t.fn()
|
||||
if obj.Debug {
|
||||
obj.Logf("%s: done: %v", t, err)
|
||||
}
|
||||
if err != nil {
|
||||
if t.retry == 0 {
|
||||
if t.report {
|
||||
// send a permanent error
|
||||
// XXX: guard errChan for early close... hmmm
|
||||
select {
|
||||
case obj.errChan <- errwrap.Wrapf(err, "task error"):
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
if t.retry > 0 { // don't decrement from -1
|
||||
t.retry--
|
||||
}
|
||||
obj.taskQueueLock.Lock()
|
||||
if obj.taskQueue != nil { // killed signal
|
||||
obj.taskQueue = append(obj.taskQueue, t)
|
||||
}
|
||||
obj.taskQueueLock.Unlock()
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// block
|
||||
for {
|
||||
if obj.Debug {
|
||||
obj.Logf("%s: run...", t)
|
||||
}
|
||||
err := t.fn()
|
||||
if obj.Debug {
|
||||
obj.Logf("%s: done: %v", t, err)
|
||||
}
|
||||
if err != nil {
|
||||
if t.retry == 0 {
|
||||
break
|
||||
}
|
||||
if t.retry > 0 { // don't decrement from -1
|
||||
t.retry--
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
173
etcd/util.go
Normal file
173
etcd/util.go
Normal file
@@ -0,0 +1,173 @@
|
||||
// Mgmt
|
||||
// Copyright (C) 2013-2019+ James Shubin and the project contributors
|
||||
// Written by James Shubin <james@shubin.ca> and the project contributors
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package etcd
|
||||
|
||||
// TODO: move to sub-package if this expands in utility or is used elsewhere...
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"github.com/purpleidea/mgmt/util/errwrap"
|
||||
|
||||
etcdtypes "github.com/coreos/etcd/pkg/types"
|
||||
)
|
||||
|
||||
// copyURL copies a URL.
|
||||
// TODO: submit this upstream to etcd ?
|
||||
func copyURL(u *url.URL) (*url.URL, error) {
|
||||
if u == nil {
|
||||
return nil, fmt.Errorf("empty URL specified")
|
||||
}
|
||||
return url.Parse(u.String()) // copy it
|
||||
}
|
||||
|
||||
// copyURLs copies a URLs.
|
||||
// TODO: submit this upstream to etcd ?
|
||||
func copyURLs(urls etcdtypes.URLs) (etcdtypes.URLs, error) {
|
||||
out := []url.URL{}
|
||||
for _, x := range urls {
|
||||
u, err := copyURL(&x)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, *u)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// copyURLsMap copies a URLsMap.
|
||||
// TODO: submit this upstream to etcd ?
|
||||
func copyURLsMap(urlsMap etcdtypes.URLsMap) (etcdtypes.URLsMap, error) {
|
||||
out := make(etcdtypes.URLsMap)
|
||||
for k, v := range urlsMap {
|
||||
urls, err := copyURLs(v)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out[k] = urls
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// cmpURLs compares two URLs, and returns nil if they are the same.
|
||||
func cmpURLs(u1, u2 etcdtypes.URLs) error {
|
||||
if (u1 == nil) != (u2 == nil) { // xor
|
||||
return fmt.Errorf("lists differ")
|
||||
}
|
||||
if len(u1) != len(u2) {
|
||||
return fmt.Errorf("length of lists is not the same")
|
||||
}
|
||||
|
||||
for i, v1 := range u1 {
|
||||
if v1 != u2[i] {
|
||||
return fmt.Errorf("index %d differs", i)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// cmpURLsMap compares two URLsMap's, and returns nil if they are the same.
|
||||
func cmpURLsMap(m1, m2 etcdtypes.URLsMap) error {
|
||||
if (m1 == nil) != (m2 == nil) { // xor
|
||||
return fmt.Errorf("maps differ")
|
||||
}
|
||||
if len(m1) != len(m2) {
|
||||
return fmt.Errorf("length of maps is not the same")
|
||||
}
|
||||
|
||||
for k, v1 := range m1 {
|
||||
v2, exists := m2[k]
|
||||
if !exists {
|
||||
return fmt.Errorf("key `%s` not found in map 2", k)
|
||||
}
|
||||
if err := cmpURLs(v1, v2); err != nil {
|
||||
return errwrap.Wrapf(err, "values at key `%s` differ", k)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// newURLsMap is a helper to build a new URLsMap without having to import the
|
||||
// messy etcdtypes package.
|
||||
func newURLsMap() etcdtypes.URLsMap {
|
||||
return make(etcdtypes.URLsMap)
|
||||
}
|
||||
|
||||
func fromURLsToStringList(urls etcdtypes.URLs) []string {
|
||||
result := []string{}
|
||||
for _, u := range urls { // flatten map
|
||||
result = append(result, u.String()) // use full url including scheme
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// fromURLsMapToStringList flattens a map of URLs into a single string list.
|
||||
// Remember to sort the result if you want it to be deterministic!
|
||||
func fromURLsMapToStringList(m etcdtypes.URLsMap) []string {
|
||||
result := []string{}
|
||||
for _, x := range m { // flatten map
|
||||
for _, u := range x {
|
||||
result = append(result, u.String()) // use full url including scheme
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// validateURLsMap checks if each embedded URL is parseable correctly.
|
||||
//func validateURLsMap(urlsMap etcdtypes.URLsMap) error {
|
||||
// _, err := copyURLsMap(urlsMap) // would fail if anything didn't parse
|
||||
// return err
|
||||
//}
|
||||
|
||||
// localhostURLs returns the most localhost like URLs for direct connection.
|
||||
// This gets clients to talk to the local servers first before looking remotely.
|
||||
// TODO: improve this algorithm as it's currently a bad heuristic
|
||||
func localhostURLs(urls etcdtypes.URLs) etcdtypes.URLs {
|
||||
out := etcdtypes.URLs{}
|
||||
for _, u := range urls {
|
||||
// "localhost" or anything in 127.0.0.0/8 is valid!
|
||||
if strings.HasPrefix(u.Host, "localhost") || strings.HasPrefix(u.Host, "127.") {
|
||||
out = append(out, u)
|
||||
continue
|
||||
}
|
||||
// or ipv6 localhost
|
||||
// TODO: are there others to add here?
|
||||
if strings.HasPrefix(u.Host, "[::1]") {
|
||||
out = append(out, u)
|
||||
continue
|
||||
}
|
||||
// or local unix domain sockets
|
||||
if u.Scheme == "unix" {
|
||||
out = append(out, u)
|
||||
continue
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
//func urlRemoveScheme(urls etcdtypes.URLs) []string {
|
||||
// strs := []string{}
|
||||
// for _, u := range urls {
|
||||
// strs = append(strs, u.Host) // remove http:// prefix
|
||||
// }
|
||||
// return strs
|
||||
//}
|
||||
189
etcd/util_test.go
Normal file
189
etcd/util_test.go
Normal file
@@ -0,0 +1,189 @@
|
||||
// Mgmt
|
||||
// Copyright (C) 2013-2019+ James Shubin and the project contributors
|
||||
// Written by James Shubin <james@shubin.ca> and the project contributors
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// +build !root
|
||||
|
||||
package etcd
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCopyURL0(t *testing.T) {
|
||||
// list of urls to test
|
||||
strs := []string{
|
||||
"",
|
||||
"http://192.168.13.42:2379",
|
||||
"https://192.168.13.42:2380",
|
||||
"http://192.168.13.42",
|
||||
"https://192.168.13.42",
|
||||
}
|
||||
for _, str := range strs {
|
||||
t.Logf("testing: `%s`", str)
|
||||
u1, err := url.Parse(str)
|
||||
if err != nil {
|
||||
t.Errorf("url did not parse: %+v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
u2, err := copyURL(u1)
|
||||
if err != nil {
|
||||
t.Errorf("url did not copy: %+v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if s := u2.String(); s != str {
|
||||
t.Errorf("url did not cmp, got: `%s`, expected: `%s`", s, str)
|
||||
}
|
||||
|
||||
// bonus test (add to separate lists of size one)
|
||||
if err := cmpURLs([]url.URL{*u1}, []url.URL{*u2}); err != nil {
|
||||
t.Errorf("urls did not cmp, err: %+v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCopyURLs0(t *testing.T) {
|
||||
// list of urls lists to test
|
||||
nstrs := [][]string{
|
||||
{}, // empty!
|
||||
{
|
||||
"http://192.168.13.42:2379",
|
||||
"https://192.168.13.42:2380",
|
||||
"http://192.168.13.42",
|
||||
"https://192.168.13.42",
|
||||
},
|
||||
{
|
||||
"http://192.168.42.42:2379",
|
||||
"https://192.168.13.42:2380",
|
||||
"http://192.168.99.42",
|
||||
"https://10.10.1.255",
|
||||
},
|
||||
{
|
||||
"http://example.com:2379",
|
||||
"https://purpleidea.com/:2379",
|
||||
"http://192.168.13.42",
|
||||
"https://192.168.13.42",
|
||||
},
|
||||
}
|
||||
for _, strs := range nstrs {
|
||||
t.Logf("testing: `%s`", strs)
|
||||
|
||||
urls1 := []url.URL{}
|
||||
for _, str := range strs {
|
||||
u, err := url.Parse(str)
|
||||
if err != nil {
|
||||
t.Errorf("url did not parse: %+v", err)
|
||||
continue
|
||||
}
|
||||
urls1 = append(urls1, *u)
|
||||
}
|
||||
|
||||
urls2, err := copyURLs(urls1)
|
||||
if err != nil {
|
||||
t.Errorf("urls did not copy: %+v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if err := cmpURLs(urls1, urls2); err != nil {
|
||||
t.Errorf("urls did not cmp, err: %+v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCopyURLsMap0(t *testing.T) {
|
||||
// list of urls lists to test
|
||||
nmstrs := []map[string][]string{
|
||||
{}, // empty!
|
||||
{
|
||||
"h1": []string{}, // empty
|
||||
"h2": []string{}, // empty
|
||||
"h3": []string{}, // empty
|
||||
},
|
||||
{
|
||||
"h1": []string{}, // empty
|
||||
"h2": nil, // nil !
|
||||
"h3": []string{}, // empty
|
||||
},
|
||||
{
|
||||
"h1": []string{}, // empty
|
||||
"h2": []string{
|
||||
"http://example.com:2379",
|
||||
"https://purpleidea.com/:2379",
|
||||
"http://192.168.13.42",
|
||||
"https://192.168.13.42",
|
||||
},
|
||||
},
|
||||
{
|
||||
"h1": []string{
|
||||
"http://192.168.13.42:2379",
|
||||
"https://192.168.13.42:2380",
|
||||
"http://192.168.13.42",
|
||||
"https://192.168.13.42",
|
||||
},
|
||||
"h2": []string{
|
||||
"http://example.com:2379",
|
||||
"https://purpleidea.com/:2379",
|
||||
"http://192.168.13.42",
|
||||
"https://192.168.13.42",
|
||||
},
|
||||
},
|
||||
{
|
||||
"h1": []string{
|
||||
"http://192.168.13.42:2379",
|
||||
"https://192.168.13.42:2380",
|
||||
"http://192.168.13.42",
|
||||
"https://192.168.13.42",
|
||||
},
|
||||
"h2": nil, // nil !
|
||||
"h3": []string{
|
||||
"http://example.com:2379",
|
||||
"https://purpleidea.com/:2379",
|
||||
"http://192.168.13.42",
|
||||
"https://192.168.13.42",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, mstrs := range nmstrs {
|
||||
t.Logf("testing: `%s`", mstrs)
|
||||
urlsMap1 := newURLsMap()
|
||||
for key, strs := range mstrs {
|
||||
urls := []url.URL{}
|
||||
for _, str := range strs {
|
||||
u, err := url.Parse(str)
|
||||
if err != nil {
|
||||
t.Errorf("url did not parse: %+v", err)
|
||||
continue
|
||||
}
|
||||
urls = append(urls, *u)
|
||||
}
|
||||
urlsMap1[key] = urls
|
||||
}
|
||||
|
||||
urlsMap2, err := copyURLsMap(urlsMap1)
|
||||
if err != nil {
|
||||
t.Errorf("urlsMap did not copy: %+v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if err := cmpURLsMap(urlsMap1, urlsMap2); err != nil {
|
||||
t.Errorf("urlsMap did not cmp, err: %+v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
107
etcd/world.go
107
etcd/world.go
@@ -18,19 +18,27 @@
|
||||
package etcd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"github.com/purpleidea/mgmt/engine"
|
||||
"github.com/purpleidea/mgmt/etcd/chooser"
|
||||
"github.com/purpleidea/mgmt/etcd/client"
|
||||
"github.com/purpleidea/mgmt/etcd/client/resources"
|
||||
"github.com/purpleidea/mgmt/etcd/client/str"
|
||||
"github.com/purpleidea/mgmt/etcd/client/strmap"
|
||||
etcdfs "github.com/purpleidea/mgmt/etcd/fs"
|
||||
"github.com/purpleidea/mgmt/etcd/interfaces"
|
||||
"github.com/purpleidea/mgmt/etcd/scheduler"
|
||||
"github.com/purpleidea/mgmt/util"
|
||||
)
|
||||
|
||||
// World is an etcd backed implementation of the World interface.
|
||||
type World struct {
|
||||
Hostname string // uuid for the consumer of these
|
||||
EmbdEtcd *EmbdEtcd
|
||||
Client interfaces.Client
|
||||
MetadataPrefix string // expected metadata prefix
|
||||
StoragePrefix string // storage prefix for etcdfs storage
|
||||
StandaloneFs engine.Fs // store an fs here for local usage
|
||||
@@ -40,72 +48,113 @@ type World struct {
|
||||
|
||||
// ResWatch returns a channel which spits out events on possible exported
|
||||
// resource changes.
|
||||
func (obj *World) ResWatch() chan error {
|
||||
return WatchResources(obj.EmbdEtcd)
|
||||
func (obj *World) ResWatch(ctx context.Context) (chan error, error) {
|
||||
return resources.WatchResources(ctx, obj.Client)
|
||||
}
|
||||
|
||||
// ResExport exports a list of resources under our hostname namespace.
|
||||
// Subsequent calls replace the previously set collection atomically.
|
||||
func (obj *World) ResExport(resourceList []engine.Res) error {
|
||||
return SetResources(obj.EmbdEtcd, obj.Hostname, resourceList)
|
||||
func (obj *World) ResExport(ctx context.Context, resourceList []engine.Res) error {
|
||||
return resources.SetResources(ctx, obj.Client, obj.Hostname, resourceList)
|
||||
}
|
||||
|
||||
// ResCollect gets the collection of exported resources which match the filter.
|
||||
// It does this atomically so that a call always returns a complete collection.
|
||||
func (obj *World) ResCollect(hostnameFilter, kindFilter []string) ([]engine.Res, error) {
|
||||
func (obj *World) ResCollect(ctx context.Context, hostnameFilter, kindFilter []string) ([]engine.Res, error) {
|
||||
// XXX: should we be restricted to retrieving resources that were
|
||||
// exported with a tag that allows or restricts our hostname? We could
|
||||
// enforce that here if the underlying API supported it... Add this?
|
||||
return GetResources(obj.EmbdEtcd, hostnameFilter, kindFilter)
|
||||
return resources.GetResources(ctx, obj.Client, hostnameFilter, kindFilter)
|
||||
}
|
||||
|
||||
// IdealClusterSizeWatch returns a stream of errors anytime the cluster-wide
|
||||
// dynamic cluster size setpoint changes.
|
||||
func (obj *World) IdealClusterSizeWatch(ctx context.Context) (chan error, error) {
|
||||
c := client.NewClientFromSimple(obj.Client, ChooserPath)
|
||||
if err := c.Init(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
util.WgFromCtx(ctx).Add(1)
|
||||
go func() {
|
||||
util.WgFromCtx(ctx).Done()
|
||||
// This must get closed *after* because it will not finish until
|
||||
// the Watcher returns, because it contains a wg.Wait() in it...
|
||||
defer c.Close() // ignore error
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
}
|
||||
}()
|
||||
return c.Watcher(ctx, chooser.IdealDynamicSizePath)
|
||||
}
|
||||
|
||||
// IdealClusterSizeGet gets the cluster-wide dynamic cluster size setpoint.
|
||||
func (obj *World) IdealClusterSizeGet(ctx context.Context) (uint16, error) {
|
||||
c := client.NewClientFromSimple(obj.Client, ChooserPath)
|
||||
if err := c.Init(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer c.Close() // ignore error
|
||||
return chooser.DynamicSizeGet(ctx, c) // use client with added namespace
|
||||
}
|
||||
|
||||
// IdealClusterSizeSet sets the cluster-wide dynamic cluster size setpoint.
|
||||
func (obj *World) IdealClusterSizeSet(ctx context.Context, size uint16) (bool, error) {
|
||||
c := client.NewClientFromSimple(obj.Client, ChooserPath)
|
||||
if err := c.Init(); err != nil {
|
||||
return false, err
|
||||
}
|
||||
defer c.Close() // ignore error
|
||||
return chooser.DynamicSizeSet(ctx, c, size)
|
||||
}
|
||||
|
||||
// StrWatch returns a channel which spits out events on possible string changes.
|
||||
func (obj *World) StrWatch(namespace string) chan error {
|
||||
return WatchStr(obj.EmbdEtcd, namespace)
|
||||
func (obj *World) StrWatch(ctx context.Context, namespace string) (chan error, error) {
|
||||
return str.WatchStr(ctx, obj.Client, namespace)
|
||||
}
|
||||
|
||||
// StrIsNotExist returns whether the error from StrGet is a key missing error.
|
||||
func (obj *World) StrIsNotExist(err error) bool {
|
||||
return err == ErrNotExist
|
||||
return err == interfaces.ErrNotExist
|
||||
}
|
||||
|
||||
// StrGet returns the value for the the given namespace.
|
||||
func (obj *World) StrGet(namespace string) (string, error) {
|
||||
return GetStr(obj.EmbdEtcd, namespace)
|
||||
func (obj *World) StrGet(ctx context.Context, namespace string) (string, error) {
|
||||
return str.GetStr(ctx, obj.Client, namespace)
|
||||
}
|
||||
|
||||
// StrSet sets the namespace value to a particular string.
|
||||
func (obj *World) StrSet(namespace, value string) error {
|
||||
return SetStr(obj.EmbdEtcd, namespace, &value)
|
||||
func (obj *World) StrSet(ctx context.Context, namespace, value string) error {
|
||||
return str.SetStr(ctx, obj.Client, namespace, &value)
|
||||
}
|
||||
|
||||
// StrDel deletes the value in a particular namespace.
|
||||
func (obj *World) StrDel(namespace string) error {
|
||||
return SetStr(obj.EmbdEtcd, namespace, nil)
|
||||
func (obj *World) StrDel(ctx context.Context, namespace string) error {
|
||||
return str.SetStr(ctx, obj.Client, namespace, nil)
|
||||
}
|
||||
|
||||
// StrMapWatch returns a channel which spits out events on possible string changes.
|
||||
func (obj *World) StrMapWatch(namespace string) chan error {
|
||||
return WatchStrMap(obj.EmbdEtcd, namespace)
|
||||
func (obj *World) StrMapWatch(ctx context.Context, namespace string) (chan error, error) {
|
||||
return strmap.WatchStrMap(ctx, obj.Client, namespace)
|
||||
}
|
||||
|
||||
// StrMapGet returns a map of hostnames to values in the given namespace.
|
||||
func (obj *World) StrMapGet(namespace string) (map[string]string, error) {
|
||||
return GetStrMap(obj.EmbdEtcd, []string{}, namespace)
|
||||
func (obj *World) StrMapGet(ctx context.Context, namespace string) (map[string]string, error) {
|
||||
return strmap.GetStrMap(ctx, obj.Client, []string{}, namespace)
|
||||
}
|
||||
|
||||
// StrMapSet sets the namespace value to a particular string under the identity
|
||||
// of its own hostname.
|
||||
func (obj *World) StrMapSet(namespace, value string) error {
|
||||
return SetStrMap(obj.EmbdEtcd, obj.Hostname, namespace, &value)
|
||||
func (obj *World) StrMapSet(ctx context.Context, namespace, value string) error {
|
||||
return strmap.SetStrMap(ctx, obj.Client, obj.Hostname, namespace, &value)
|
||||
}
|
||||
|
||||
// StrMapDel deletes the value in a particular namespace.
|
||||
func (obj *World) StrMapDel(namespace string) error {
|
||||
return SetStrMap(obj.EmbdEtcd, obj.Hostname, namespace, nil)
|
||||
func (obj *World) StrMapDel(ctx context.Context, namespace string) error {
|
||||
return strmap.SetStrMap(ctx, obj.Client, obj.Hostname, namespace, nil)
|
||||
}
|
||||
|
||||
// Scheduler returns a scheduling result of hosts in a particular namespace.
|
||||
// XXX: Add a context.Context here
|
||||
func (obj *World) Scheduler(namespace string, opts ...scheduler.Option) (*scheduler.Result, error) {
|
||||
modifiedOpts := []scheduler.Option{}
|
||||
for _, o := range opts {
|
||||
@@ -115,7 +164,8 @@ func (obj *World) Scheduler(namespace string, opts ...scheduler.Option) (*schedu
|
||||
modifiedOpts = append(modifiedOpts, scheduler.Debug(obj.Debug))
|
||||
modifiedOpts = append(modifiedOpts, scheduler.Logf(obj.Logf))
|
||||
|
||||
return scheduler.Schedule(obj.EmbdEtcd.GetClient(), fmt.Sprintf("%s/scheduler/%s", NS, namespace), obj.Hostname, modifiedOpts...)
|
||||
path := fmt.Sprintf(schedulerPathFmt, namespace)
|
||||
return scheduler.Schedule(obj.Client.GetClient(), path, obj.Hostname, modifiedOpts...)
|
||||
}
|
||||
|
||||
// Fs returns a distributed file system from a unique URI. For single host
|
||||
@@ -144,9 +194,14 @@ func (obj *World) Fs(uri string) (engine.Fs, error) {
|
||||
}
|
||||
|
||||
etcdFs := &etcdfs.Fs{
|
||||
Client: obj.EmbdEtcd.GetClient(),
|
||||
Client: obj.Client, // TODO: do we need to add a namespace?
|
||||
Metadata: u.Path,
|
||||
DataPrefix: obj.StoragePrefix,
|
||||
|
||||
Debug: obj.Debug,
|
||||
Logf: func(format string, v ...interface{}) {
|
||||
obj.Logf("fs: "+format, v...)
|
||||
},
|
||||
}
|
||||
return etcdFs, nil
|
||||
}
|
||||
|
||||
4
examples/lang/etcd-config0.mcl
Normal file
4
examples/lang/etcd-config0.mcl
Normal file
@@ -0,0 +1,4 @@
|
||||
# sets a cluster parameter, safe to be called identically from multiple machines
|
||||
config:etcd "whatever" {
|
||||
idealclustersize => 7,
|
||||
}
|
||||
@@ -1,9 +1,10 @@
|
||||
# run this example with these commands
|
||||
# watch -n 0.1 'tail *' # run this in /tmp/mgmt/
|
||||
# time ./mgmt run --hostname h1 --ideal-cluster-size 1 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
|
||||
# time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
|
||||
# time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
|
||||
# time ./mgmt run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
|
||||
# time ./mgmt run --hostname h1 --tmp-prefix --no-pgp empty
|
||||
# time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp empty
|
||||
# time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp empty
|
||||
# time ./mgmt run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp empty
|
||||
# time ./mgmt deploy --no-git --seeds http://127.0.0.1:2379 lang --lang examples/lang/exchange0.mcl
|
||||
|
||||
import "sys"
|
||||
import "world"
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
package coreworld
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/purpleidea/mgmt/lang/funcs"
|
||||
@@ -75,6 +76,8 @@ func (obj *ExchangeFunc) Init(init *interfaces.Init) error {
|
||||
// Stream returns the changing values that this func has over time.
|
||||
func (obj *ExchangeFunc) Stream() error {
|
||||
defer close(obj.init.Output) // the sender closes
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
for {
|
||||
select {
|
||||
// TODO: should this first chan be run as a priority channel to
|
||||
@@ -106,7 +109,12 @@ func (obj *ExchangeFunc) Stream() error {
|
||||
// TODO: possibly removing our stored value there first!
|
||||
if obj.namespace == "" {
|
||||
obj.namespace = namespace // store it
|
||||
obj.watchChan = obj.init.World.StrMapWatch(obj.namespace) // watch for var changes
|
||||
var err error
|
||||
obj.watchChan, err = obj.init.World.StrMapWatch(ctx, obj.namespace) // watch for var changes
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
} else if obj.namespace != namespace {
|
||||
return fmt.Errorf("can't change namespace, previously: `%s`", obj.namespace)
|
||||
}
|
||||
@@ -116,7 +124,7 @@ func (obj *ExchangeFunc) Stream() error {
|
||||
obj.init.Logf("value: %+v", value)
|
||||
}
|
||||
|
||||
if err := obj.init.World.StrMapSet(obj.namespace, value); err != nil {
|
||||
if err := obj.init.World.StrMapSet(ctx, obj.namespace, value); err != nil {
|
||||
return errwrap.Wrapf(err, "namespace write error of `%s` to `%s`", value, obj.namespace)
|
||||
}
|
||||
|
||||
@@ -134,7 +142,7 @@ func (obj *ExchangeFunc) Stream() error {
|
||||
return errwrap.Wrapf(err, "channel watch failed on `%s`", obj.namespace)
|
||||
}
|
||||
|
||||
keyMap, err := obj.init.World.StrMapGet(obj.namespace)
|
||||
keyMap, err := obj.init.World.StrMapGet(ctx, obj.namespace)
|
||||
if err != nil {
|
||||
return errwrap.Wrapf(err, "channel read failed on `%s`", obj.namespace)
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
package coreworld
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/purpleidea/mgmt/lang/funcs"
|
||||
@@ -73,6 +74,8 @@ func (obj *KVLookupFunc) Init(init *interfaces.Init) error {
|
||||
// Stream returns the changing values that this func has over time.
|
||||
func (obj *KVLookupFunc) Stream() error {
|
||||
defer close(obj.init.Output) // the sender closes
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
for {
|
||||
select {
|
||||
// TODO: should this first chan be run as a priority channel to
|
||||
@@ -104,9 +107,13 @@ func (obj *KVLookupFunc) Stream() error {
|
||||
// TODO: possibly removing our stored value there first!
|
||||
if obj.namespace == "" {
|
||||
obj.namespace = namespace // store it
|
||||
obj.watchChan = obj.init.World.StrMapWatch(obj.namespace) // watch for var changes
|
||||
var err error
|
||||
obj.watchChan, err = obj.init.World.StrMapWatch(ctx, obj.namespace) // watch for var changes
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
result, err := obj.buildMap() // build the map...
|
||||
result, err := obj.buildMap(ctx) // build the map...
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -135,7 +142,7 @@ func (obj *KVLookupFunc) Stream() error {
|
||||
return errwrap.Wrapf(err, "channel watch failed on `%s`", obj.namespace)
|
||||
}
|
||||
|
||||
result, err := obj.buildMap() // build the map...
|
||||
result, err := obj.buildMap(ctx) // build the map...
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -166,8 +173,8 @@ func (obj *KVLookupFunc) Close() error {
|
||||
}
|
||||
|
||||
// buildMap builds the result map which we'll need. It uses struct variables.
|
||||
func (obj *KVLookupFunc) buildMap() (types.Value, error) {
|
||||
keyMap, err := obj.init.World.StrMapGet(obj.namespace)
|
||||
func (obj *KVLookupFunc) buildMap(ctx context.Context) (types.Value, error) {
|
||||
keyMap, err := obj.init.World.StrMapGet(ctx, obj.namespace)
|
||||
if err != nil {
|
||||
return nil, errwrap.Wrapf(err, "channel read failed on `%s`", obj.namespace)
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// test with:
|
||||
// time ./mgmt run --hostname h1 --ideal-cluster-size 1 --tmp-prefix --no-pgp lang --lang examples/lang/schedule0.mcl
|
||||
// time ./mgmt run --hostname h1 --tmp-prefix --no-pgp lang --lang examples/lang/schedule0.mcl
|
||||
// time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp lang --lang examples/lang/schedule0.mcl
|
||||
// time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp lang --lang examples/lang/schedule0.mcl
|
||||
// kill h2 (should see h1 and h3 pick [h1, h3] instead)
|
||||
|
||||
@@ -18,11 +18,13 @@
|
||||
package lib
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"github.com/purpleidea/mgmt/etcd"
|
||||
"github.com/purpleidea/mgmt/etcd/client"
|
||||
"github.com/purpleidea/mgmt/etcd/deployer"
|
||||
etcdfs "github.com/purpleidea/mgmt/etcd/fs"
|
||||
"github.com/purpleidea/mgmt/gapi"
|
||||
"github.com/purpleidea/mgmt/util/errwrap"
|
||||
@@ -34,12 +36,13 @@ import (
|
||||
|
||||
const (
|
||||
// MetadataPrefix is the etcd prefix where all our fs superblocks live.
|
||||
MetadataPrefix = etcd.NS + "/fs"
|
||||
MetadataPrefix = "/fs"
|
||||
// StoragePrefix is the etcd prefix where all our fs data lives.
|
||||
StoragePrefix = etcd.NS + "/storage"
|
||||
StoragePrefix = "/storage"
|
||||
)
|
||||
|
||||
// deploy is the cli target to manage deploys to our cluster.
|
||||
// TODO: add a timeout and/or cancel signal to replace context.TODO()
|
||||
func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error {
|
||||
cliContext := c.Parent()
|
||||
if cliContext == nil {
|
||||
@@ -55,7 +58,12 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error {
|
||||
debug = flags.Debug
|
||||
}
|
||||
}
|
||||
Logf := func(format string, v ...interface{}) {
|
||||
log.Printf("deploy: "+format, v...)
|
||||
}
|
||||
|
||||
hello(program, version, flags) // say hello!
|
||||
defer Logf("goodbye!")
|
||||
|
||||
var hash, pHash string
|
||||
if !cliContext.Bool("no-git") {
|
||||
@@ -74,7 +82,7 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error {
|
||||
}
|
||||
|
||||
hash = head.Hash().String() // current commit id
|
||||
log.Printf("deploy: hash: %s", hash)
|
||||
Logf("hash: %s", hash)
|
||||
|
||||
lo := &git.LogOptions{
|
||||
From: head.Hash(),
|
||||
@@ -90,7 +98,7 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error {
|
||||
if err == nil { // errors are okay, we might be empty
|
||||
pHash = commit.Hash.String() // previous commit id
|
||||
}
|
||||
log.Printf("deploy: previous deploy hash: %s", pHash)
|
||||
Logf("previous deploy hash: %s", pHash)
|
||||
if cliContext.Bool("force") {
|
||||
pHash = "" // don't check this :(
|
||||
}
|
||||
@@ -101,28 +109,58 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error {
|
||||
|
||||
uniqueid := uuid.New() // panic's if it can't generate one :P
|
||||
|
||||
etcdClient := &etcd.ClientEtcd{
|
||||
Seeds: cliContext.StringSlice("seeds"), // endpoints
|
||||
etcdClient := client.NewClientFromSeedsNamespace(
|
||||
cliContext.StringSlice("seeds"), // endpoints
|
||||
NS,
|
||||
)
|
||||
if err := etcdClient.Init(); err != nil {
|
||||
return errwrap.Wrapf(err, "client Init failed")
|
||||
}
|
||||
if err := etcdClient.Connect(); err != nil {
|
||||
return errwrap.Wrapf(err, "client connection error")
|
||||
defer func() {
|
||||
err := errwrap.Wrapf(etcdClient.Close(), "client Close failed")
|
||||
if err != nil {
|
||||
// TODO: cause the final exit code to be non-zero
|
||||
Logf("client cleanup error: %+v", err)
|
||||
}
|
||||
defer etcdClient.Destroy()
|
||||
}()
|
||||
|
||||
simpleDeploy := &deployer.SimpleDeploy{
|
||||
Client: etcdClient,
|
||||
Debug: debug,
|
||||
Logf: func(format string, v ...interface{}) {
|
||||
Logf("deploy: "+format, v...)
|
||||
},
|
||||
}
|
||||
if err := simpleDeploy.Init(); err != nil {
|
||||
return errwrap.Wrapf(err, "deploy Init failed")
|
||||
}
|
||||
defer func() {
|
||||
err := errwrap.Wrapf(simpleDeploy.Close(), "deploy Close failed")
|
||||
if err != nil {
|
||||
// TODO: cause the final exit code to be non-zero
|
||||
Logf("deploy cleanup error: %+v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// get max id (from all the previous deploys)
|
||||
max, err := etcd.GetMaxDeployID(etcdClient)
|
||||
max, err := simpleDeploy.GetMaxDeployID(context.TODO())
|
||||
if err != nil {
|
||||
return errwrap.Wrapf(err, "error getting max deploy id")
|
||||
}
|
||||
// find the latest id
|
||||
var id = max + 1 // next id
|
||||
log.Printf("deploy: max deploy id: %d", max)
|
||||
Logf("previous max deploy id: %d", max)
|
||||
|
||||
etcdFs := &etcdfs.Fs{
|
||||
Client: etcdClient.GetClient(),
|
||||
Client: etcdClient,
|
||||
// TODO: using a uuid is meant as a temporary measure, i hate them
|
||||
Metadata: MetadataPrefix + fmt.Sprintf("/deploy/%d-%s", id, uniqueid),
|
||||
DataPrefix: StoragePrefix,
|
||||
|
||||
Debug: debug,
|
||||
Logf: func(format string, v ...interface{}) {
|
||||
Logf("fs: "+format, v...)
|
||||
},
|
||||
}
|
||||
|
||||
cliInfo := &gapi.CliInfo{
|
||||
@@ -154,9 +192,9 @@ func deploy(c *cli.Context, name string, gapiObj gapi.GAPI) error {
|
||||
}
|
||||
|
||||
// this nominally checks the previous git hash matches our expectation
|
||||
if err := etcd.AddDeploy(etcdClient, id, hash, pHash, &str); err != nil {
|
||||
if err := simpleDeploy.AddDeploy(context.TODO(), id, hash, pHash, &str); err != nil {
|
||||
return errwrap.Wrapf(err, "could not create deploy id `%d`", id)
|
||||
}
|
||||
log.Printf("deploy: success, id: %d", id)
|
||||
Logf("success, id: %d", id)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -37,6 +37,7 @@ func hello(program, version string, flags Flags) {
|
||||
log.SetFlags(logFlags)
|
||||
|
||||
// un-hijack from capnslog...
|
||||
// XXX: move this to the etcd package when new version deprecates capnslog
|
||||
log.SetOutput(os.Stderr)
|
||||
if flags.Verbose {
|
||||
capnslog.SetFormatter(capnslog.NewLogFormatter(os.Stderr, "(etcd) ", logFlags))
|
||||
|
||||
227
lib/main.go
227
lib/main.go
@@ -18,6 +18,7 @@
|
||||
package lib
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
@@ -33,6 +34,8 @@ import (
|
||||
"github.com/purpleidea/mgmt/engine/graph/autogroup"
|
||||
_ "github.com/purpleidea/mgmt/engine/resources" // let register's run
|
||||
"github.com/purpleidea/mgmt/etcd"
|
||||
"github.com/purpleidea/mgmt/etcd/chooser"
|
||||
"github.com/purpleidea/mgmt/etcd/deployer"
|
||||
"github.com/purpleidea/mgmt/gapi"
|
||||
"github.com/purpleidea/mgmt/gapi/empty"
|
||||
"github.com/purpleidea/mgmt/pgp"
|
||||
@@ -44,10 +47,14 @@ import (
|
||||
etcdtypes "github.com/coreos/etcd/pkg/types"
|
||||
)
|
||||
|
||||
const (
|
||||
// NS is the root namespace for etcd operations. All keys must use it!
|
||||
NS = "/_mgmt" // must not end with a slash!
|
||||
)
|
||||
|
||||
// Flags are some constant flags which are used throughout the program.
|
||||
type Flags struct {
|
||||
Debug bool // add additional log messages
|
||||
Trace bool // add execution flow log messages
|
||||
Verbose bool // add extra log message output
|
||||
}
|
||||
|
||||
@@ -105,6 +112,7 @@ type Main struct {
|
||||
Prometheus bool // enable prometheus metrics
|
||||
PrometheusListen string // prometheus instance bind specification
|
||||
|
||||
embdEtcd *etcd.EmbdEtcd // TODO: can be an interface in the future...
|
||||
ge *graph.Engine
|
||||
|
||||
exit *util.EasyExit // exit signal
|
||||
@@ -140,7 +148,7 @@ func (obj *Main) Init() error {
|
||||
|
||||
obj.idealClusterSize = uint16(obj.IdealClusterSize)
|
||||
if obj.IdealClusterSize < 0 { // value is undefined, set to the default
|
||||
obj.idealClusterSize = etcd.DefaultIdealClusterSize
|
||||
obj.idealClusterSize = chooser.DefaultIdealDynamicSize
|
||||
}
|
||||
|
||||
if obj.idealClusterSize < 1 {
|
||||
@@ -194,6 +202,7 @@ func (obj *Main) Run() error {
|
||||
hello(obj.Program, obj.Version, obj.Flags) // say hello!
|
||||
defer Logf("goodbye!")
|
||||
|
||||
exitCtx := obj.exit.Context() // local exit signal
|
||||
defer obj.exit.Done(nil) // ensure this gets called even if Exit doesn't
|
||||
|
||||
hostname, err := os.Hostname() // a sensible default
|
||||
@@ -243,13 +252,14 @@ func (obj *Main) Run() error {
|
||||
if err := prom.InitKindMetrics(engine.RegisteredResourcesNames()); err != nil {
|
||||
return errwrap.Wrapf(err, "can't initialize kind-specific prometheus metrics")
|
||||
}
|
||||
obj.cleanup = append(obj.cleanup, func() error {
|
||||
defer func() {
|
||||
Logf("prometheus: stopping instance")
|
||||
if err := prom.Stop(); err != nil {
|
||||
return errwrap.Wrapf(err, "the prometheus instance exited poorly")
|
||||
err := errwrap.Wrapf(prom.Stop(), "the prometheus instance exited poorly")
|
||||
if err != nil {
|
||||
// TODO: cause the final exit code to be non-zero
|
||||
Logf("cleanup error: %+v", err)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}()
|
||||
}
|
||||
|
||||
if !obj.NoPgp {
|
||||
@@ -296,6 +306,8 @@ func (obj *Main) Run() error {
|
||||
|
||||
exitchan := make(chan struct{}) // exit on close
|
||||
wg := &sync.WaitGroup{} // waitgroup for inner loop & goroutines
|
||||
defer wg.Wait() // wait in case we have an early exit
|
||||
defer obj.exit.Done(nil) // trigger exit in case something blocks
|
||||
|
||||
// exit after `max-runtime` seconds for no reason at all...
|
||||
if i := obj.MaxRuntime; i > 0 {
|
||||
@@ -335,63 +347,108 @@ func (obj *Main) Run() error {
|
||||
// XXX: should this be moved to later in the code?
|
||||
go converger.Run(true) // main loop for converger, true to start paused
|
||||
converger.Ready() // block until ready
|
||||
obj.cleanup = append(obj.cleanup, func() error {
|
||||
defer func() {
|
||||
// TODO: shutdown converger, but make sure that using it in a
|
||||
// still running embdEtcd struct doesn't block waiting on it...
|
||||
converger.Shutdown()
|
||||
return nil
|
||||
})
|
||||
}()
|
||||
|
||||
// embedded etcd
|
||||
if len(obj.seeds) == 0 {
|
||||
Logf("etcd: seeds: no seeds specified!")
|
||||
Logf("no seeds specified!")
|
||||
} else {
|
||||
Logf("etcd: seeds(%d): %+v", len(obj.seeds), obj.seeds)
|
||||
Logf("seeds(%d): %+v", len(obj.seeds), obj.seeds)
|
||||
}
|
||||
embdEtcd := etcd.NewEmbdEtcd(
|
||||
hostname,
|
||||
obj.seeds,
|
||||
obj.clientURLs,
|
||||
obj.serverURLs,
|
||||
obj.advertiseClientURLs,
|
||||
obj.advertiseServerURLs,
|
||||
obj.NoServer,
|
||||
obj.NoNetwork,
|
||||
obj.idealClusterSize,
|
||||
etcd.Flags{
|
||||
Debug: obj.Flags.Debug,
|
||||
Trace: obj.Flags.Trace,
|
||||
Verbose: obj.Flags.Verbose,
|
||||
},
|
||||
prefix,
|
||||
converger,
|
||||
)
|
||||
if embdEtcd == nil {
|
||||
return fmt.Errorf("etcd: creation failed")
|
||||
} else if err := embdEtcd.Startup(); err != nil { // startup (returns when etcd main loop is running)
|
||||
return errwrap.Wrapf(err, "etcd: startup failed")
|
||||
}
|
||||
obj.cleanup = append(obj.cleanup, func() error {
|
||||
// cleanup etcd main loop last so it can process everything first
|
||||
err := embdEtcd.Destroy() // shutdown and cleanup etcd
|
||||
return errwrap.Wrapf(err, "etcd: exited poorly")
|
||||
})
|
||||
obj.embdEtcd = &etcd.EmbdEtcd{
|
||||
Hostname: hostname,
|
||||
Seeds: obj.seeds,
|
||||
|
||||
// wait for etcd server to be ready before continuing...
|
||||
// XXX: this is wrong if we're not going to be a server! we'll block!!!
|
||||
// select {
|
||||
// case <-embdEtcd.ServerReady():
|
||||
// Logf("etcd: server: ready!")
|
||||
// // pass
|
||||
// case <-time.After(((etcd.MaxStartServerTimeout * etcd.MaxStartServerRetries) + 1) * time.Second):
|
||||
// return fmt.Errorf("etcd: startup timeout")
|
||||
// }
|
||||
time.Sleep(1 * time.Second) // XXX: temporary workaround
|
||||
ClientURLs: obj.clientURLs,
|
||||
ServerURLs: obj.serverURLs,
|
||||
AClientURLs: obj.advertiseClientURLs,
|
||||
AServerURLs: obj.advertiseServerURLs,
|
||||
|
||||
NoServer: obj.NoServer,
|
||||
NoNetwork: obj.NoNetwork,
|
||||
|
||||
Chooser: &chooser.DynamicSize{
|
||||
IdealClusterSize: obj.idealClusterSize,
|
||||
},
|
||||
|
||||
Converger: converger,
|
||||
|
||||
NS: NS, // namespace
|
||||
Prefix: fmt.Sprintf("%s/", path.Join(prefix, "etcd")),
|
||||
|
||||
Debug: obj.Flags.Debug,
|
||||
Logf: func(format string, v ...interface{}) {
|
||||
log.Printf("etcd: "+format, v...)
|
||||
},
|
||||
}
|
||||
if err := obj.embdEtcd.Init(); err != nil {
|
||||
return errwrap.Wrapf(err, "etcd init failed")
|
||||
}
|
||||
defer func() {
|
||||
// cleanup etcd main loop last so it can process everything first
|
||||
err := errwrap.Wrapf(obj.embdEtcd.Close(), "etcd close failed")
|
||||
if err != nil {
|
||||
// TODO: cause the final exit code to be non-zero
|
||||
Logf("cleanup error: %+v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
var etcdErr error
|
||||
// don't add a wait group here, this is done in embdEtcd.Destroy()
|
||||
go func() {
|
||||
etcdErr = obj.embdEtcd.Run() // returns when it shuts down...
|
||||
obj.exit.Done(errwrap.Wrapf(etcdErr, "etcd run failed")) // trigger exit
|
||||
}()
|
||||
// tell etcd to shutdown, blocks until done!
|
||||
// TODO: handle/report error?
|
||||
defer obj.embdEtcd.Destroy()
|
||||
|
||||
// wait for etcd to be ready before continuing...
|
||||
// TODO: do we need to add a timeout here?
|
||||
select {
|
||||
case <-obj.embdEtcd.Ready():
|
||||
Logf("etcd is ready!")
|
||||
// pass
|
||||
|
||||
case <-obj.embdEtcd.Exited():
|
||||
Logf("etcd was destroyed!")
|
||||
err := fmt.Errorf("etcd was destroyed on startup")
|
||||
if etcdErr != nil {
|
||||
err = etcdErr
|
||||
}
|
||||
return err
|
||||
}
|
||||
// TODO: should getting a client from EmbdEtcd already come with the NS?
|
||||
etcdClient, err := obj.embdEtcd.MakeClientFromNamespace(NS)
|
||||
if err != nil {
|
||||
return errwrap.Wrapf(err, "make Client failed")
|
||||
}
|
||||
simpleDeploy := &deployer.SimpleDeploy{
|
||||
Client: etcdClient,
|
||||
Debug: obj.Flags.Debug,
|
||||
Logf: func(format string, v ...interface{}) {
|
||||
log.Printf("deploy: "+format, v...)
|
||||
},
|
||||
}
|
||||
if err := simpleDeploy.Init(); err != nil {
|
||||
return errwrap.Wrapf(err, "deploy Init failed")
|
||||
}
|
||||
defer func() {
|
||||
err := errwrap.Wrapf(simpleDeploy.Close(), "deploy Close failed")
|
||||
if err != nil {
|
||||
// TODO: cause the final exit code to be non-zero
|
||||
Logf("cleanup error: %+v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// implementation of the World API (alternatives can be substituted in)
|
||||
world := &etcd.World{
|
||||
Hostname: hostname,
|
||||
EmbdEtcd: embdEtcd,
|
||||
Client: etcdClient,
|
||||
MetadataPrefix: MetadataPrefix,
|
||||
StoragePrefix: StoragePrefix,
|
||||
StandaloneFs: obj.DeployFs, // used for static deploys
|
||||
@@ -415,9 +472,16 @@ func (obj *Main) Run() error {
|
||||
}
|
||||
|
||||
if err := obj.ge.Init(); err != nil {
|
||||
return errwrap.Wrapf(err, "engine: creation failed")
|
||||
return errwrap.Wrapf(err, "engine Init failed")
|
||||
}
|
||||
// After this point, the inner "main loop" must run, so that the engine
|
||||
defer func() {
|
||||
err := errwrap.Wrapf(obj.ge.Close(), "engine Close failed")
|
||||
if err != nil {
|
||||
// TODO: cause the final exit code to be non-zero
|
||||
Logf("cleanup error: %+v", err)
|
||||
}
|
||||
}()
|
||||
// After this point, the inner "main loop" will run, so that the engine
|
||||
// can get closed with the deploy close via the deploy chan shutdown...
|
||||
|
||||
// main loop logic starts here
|
||||
@@ -456,7 +520,7 @@ func (obj *Main) Run() error {
|
||||
obj.ge.Pause(false)
|
||||
}
|
||||
// must be paused before this is run
|
||||
obj.ge.Close()
|
||||
//obj.ge.Close() // run in defer instead
|
||||
|
||||
return // this is the only place we exit
|
||||
}
|
||||
@@ -678,9 +742,10 @@ func (obj *Main) Run() error {
|
||||
|
||||
// get max id (from all the previous deploys)
|
||||
// this is what the existing cluster is already running
|
||||
// TODO: can this block since we didn't deploy yet?
|
||||
max, err := etcd.GetMaxDeployID(embdEtcd)
|
||||
// TODO: add a timeout to context?
|
||||
max, err := simpleDeploy.GetMaxDeployID(exitCtx)
|
||||
if err != nil {
|
||||
close(deployChan) // because we won't close it downstream...
|
||||
return errwrap.Wrapf(err, "error getting max deploy id")
|
||||
}
|
||||
|
||||
@@ -710,9 +775,24 @@ func (obj *Main) Run() error {
|
||||
|
||||
// now we can wait for future deploys, but if we already had an
|
||||
// initial deploy from run, don't switch to this unless it's new
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
watchChan, err := simpleDeploy.WatchDeploy(ctx)
|
||||
if err != nil {
|
||||
cancel()
|
||||
Logf("error starting deploy: %+v", err)
|
||||
return
|
||||
}
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
defer cancel() // unblock watch deploy
|
||||
select { // wait until we're ready to shutdown
|
||||
case <-exitchan:
|
||||
}
|
||||
}()
|
||||
canceled := false
|
||||
|
||||
var last uint64
|
||||
startChan := make(chan struct{}) // start signal
|
||||
close(startChan) // kick it off!
|
||||
for {
|
||||
if obj.NoDeployWatch && (obj.Deploy != nil || last > 0) {
|
||||
// block here, because when we close the
|
||||
@@ -725,29 +805,33 @@ func (obj *Main) Run() error {
|
||||
}
|
||||
|
||||
select {
|
||||
case <-startChan: // kick the loop once at start
|
||||
startChan = nil // disable
|
||||
|
||||
case err, ok := <-etcd.WatchDeploy(embdEtcd):
|
||||
// WatchDeploy should send an initial event now...
|
||||
case err, ok := <-watchChan:
|
||||
if !ok {
|
||||
// TODO: is any of this needed in here?
|
||||
if !canceled {
|
||||
obj.exit.Done(nil) // regular shutdown
|
||||
}
|
||||
return
|
||||
}
|
||||
if err == context.Canceled {
|
||||
canceled = true
|
||||
continue // channel close is coming...
|
||||
}
|
||||
if err != nil {
|
||||
// TODO: it broke, can we restart?
|
||||
obj.exit.Done(fmt.Errorf("deploy: watch error"))
|
||||
return
|
||||
obj.exit.Done(errwrap.Wrapf(err, "deploy: watch error"))
|
||||
continue
|
||||
}
|
||||
startChan = nil // disable it early...
|
||||
if obj.Flags.Debug {
|
||||
Logf("deploy: got activity")
|
||||
}
|
||||
|
||||
case <-exitchan:
|
||||
return
|
||||
//case <-exitchan:
|
||||
// return // exit via channel close instead
|
||||
}
|
||||
|
||||
latest, err := etcd.GetMaxDeployID(embdEtcd) // or zero
|
||||
latest, err := simpleDeploy.GetMaxDeployID(ctx) // or zero
|
||||
if err != nil {
|
||||
Logf("error getting max deploy id: %+v", err)
|
||||
continue
|
||||
@@ -774,7 +858,7 @@ func (obj *Main) Run() error {
|
||||
|
||||
// 0 passes through an empty deploy without an error...
|
||||
// (unless there is some sort of etcd error that occurs)
|
||||
str, err := etcd.GetDeploy(embdEtcd, latest)
|
||||
str, err := simpleDeploy.GetDeploy(ctx, latest)
|
||||
if err != nil {
|
||||
Logf("deploy: error getting deploy: %+v", err)
|
||||
continue
|
||||
@@ -871,6 +955,9 @@ func (obj *Main) FastExit(err error) {
|
||||
// might leave some of your resources in a partial or unknown state.
|
||||
func (obj *Main) Interrupt(err error) {
|
||||
// XXX: implement and run Interrupt API for supported resources
|
||||
|
||||
obj.FastExit(err)
|
||||
|
||||
if obj.embdEtcd != nil {
|
||||
obj.embdEtcd.Interrupt() // unblock borked clusters
|
||||
}
|
||||
}
|
||||
|
||||
@@ -175,14 +175,19 @@ func run(c *cli.Context, name string, gapiObj gapi.GAPI) error {
|
||||
reterr := obj.Run()
|
||||
if reterr != nil {
|
||||
// log the error message returned
|
||||
log.Printf("main: Error: %v", reterr)
|
||||
if obj.Flags.Debug {
|
||||
log.Printf("main: %+v", reterr)
|
||||
}
|
||||
}
|
||||
|
||||
if err := obj.Close(); err != nil {
|
||||
log.Printf("main: Close: %v", err)
|
||||
if obj.Flags.Debug {
|
||||
log.Printf("main: Close: %+v", err)
|
||||
}
|
||||
if reterr == nil {
|
||||
return err
|
||||
}
|
||||
reterr = errwrap.Append(reterr, err)
|
||||
}
|
||||
|
||||
return reterr
|
||||
|
||||
2
main.go
2
main.go
@@ -27,7 +27,6 @@ import (
|
||||
// These constants are some global variables that are used throughout the code.
|
||||
const (
|
||||
Debug = false // add additional log messages
|
||||
Trace = false // add execution flow log messages
|
||||
Verbose = false // add extra log message output
|
||||
)
|
||||
|
||||
@@ -40,7 +39,6 @@ var (
|
||||
func main() {
|
||||
flags := mgmt.Flags{
|
||||
Debug: Debug,
|
||||
Trace: Trace,
|
||||
Verbose: Verbose,
|
||||
}
|
||||
if err := mgmt.CLI(program, version, flags); err != nil {
|
||||
|
||||
@@ -23,17 +23,25 @@
|
||||
|
||||
import sys
|
||||
|
||||
lines = sys.stdin.readlines()
|
||||
if len(sys.argv) == 2 and sys.argv[1] != "-":
|
||||
lines = open(sys.argv[1], "r").readlines()
|
||||
else:
|
||||
lines = sys.stdin.readlines()
|
||||
|
||||
print("read: %d lines" % len(lines))
|
||||
|
||||
# find program start
|
||||
start = -1
|
||||
for i in range(len(lines)):
|
||||
line = lines[i]
|
||||
if line.startswith("PC="):
|
||||
start=i
|
||||
break
|
||||
|
||||
if start == -1:
|
||||
print("could not find program start, looking for PC=???", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print("starts at line: %d" % (start+1)) # +1 because we're zero based
|
||||
|
||||
def is_chunk(line):
|
||||
@@ -59,6 +67,18 @@ def filter_chunk(chunk):
|
||||
package_line = lines[1]
|
||||
if package_line.startswith("github.com/purpleidea/mgmt/vendor/"):
|
||||
return False
|
||||
if package_line.startswith("github.com/") and not package_line.startswith("github.com/purpleidea/mgmt/"):
|
||||
return False
|
||||
if package_line.startswith("internal/poll"):
|
||||
return False
|
||||
if package_line.startswith("context.propagateCancel"):
|
||||
return False
|
||||
if package_line.startswith("runtime.gopark"):
|
||||
return False
|
||||
if package_line.startswith("runtime.futex"):
|
||||
return False
|
||||
if package_line.startswith("os/signal.signal_recv"):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ if ! command -v etcdctl >/dev/null; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
mkdir /tmp/mgmt/{A..E}
|
||||
#mkdir /tmp/mgmt/{A..E}
|
||||
|
||||
# kill servers on error/exit
|
||||
trap 'pkill -9 mgmt' EXIT
|
||||
@@ -22,7 +22,7 @@ $TIMEOUT "$MGMT" run --hostname h3 --tmp-prefix --no-pgp --seeds http://127.0.0.
|
||||
# wait for everything to converge
|
||||
sleep 30s
|
||||
|
||||
ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/idealClusterSize 3
|
||||
ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/chooser/dynamicsize/idealclustersize 3
|
||||
|
||||
$TIMEOUT "$MGMT" run --hostname h4 --tmp-prefix --no-pgp --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 empty &
|
||||
$TIMEOUT "$MGMT" run --hostname h5 --tmp-prefix --no-pgp --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2387 --server-urls http://127.0.0.1:2388 empty &
|
||||
@@ -32,7 +32,7 @@ sleep 30s
|
||||
|
||||
test "$(ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 member list | wc -l)" -eq 3
|
||||
|
||||
ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2381 put /_mgmt/idealClusterSize 5
|
||||
ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/chooser/dynamicsize/idealclustersize 5
|
||||
|
||||
# wait for everything to converge
|
||||
sleep 30s
|
||||
27
test/shell/etcd-conflicting-server.sh
Executable file
27
test/shell/etcd-conflicting-server.sh
Executable file
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
|
||||
. "$(dirname "$0")/../util.sh"
|
||||
|
||||
# run empty graphs, we're just testing etcd clustering
|
||||
$TIMEOUT "$MGMT" run --no-pgp --tmp-prefix empty &
|
||||
pid1=$!
|
||||
sleep 15s # let it startup
|
||||
|
||||
# run a second one that should conflict because a server is already running...
|
||||
$TIMEOUT "$MGMT" run --no-pgp --tmp-prefix empty &
|
||||
pid2=$!
|
||||
wait $pid2
|
||||
e=$?
|
||||
if [ $e -eq 0 ]; then
|
||||
echo "second mgmt exited successfully when error was expected"
|
||||
exit 1
|
||||
fi
|
||||
if [ $e -ne 1 ]; then
|
||||
echo "second mgmt exited with unexpected error of $e"
|
||||
exit $e
|
||||
fi
|
||||
|
||||
$(kill -SIGINT $pid1)& # send ^C to exit 1st mgmt
|
||||
wait $pid1 # get exit status
|
||||
# if pid1 exits because of a timeout, then it blocked, and this is a bug!
|
||||
exit $?
|
||||
35
test/shell/etcd-three-hosts-reversed.sh
Executable file
35
test/shell/etcd-three-hosts-reversed.sh
Executable file
@@ -0,0 +1,35 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
. "$(dirname "$0")/../util.sh"
|
||||
|
||||
# run empty graphs, we're just testing etcd clustering
|
||||
$TIMEOUT "$MGMT" run --hostname h1 --tmp-prefix empty &
|
||||
pid1=$!
|
||||
sleep 15s # let it startup
|
||||
|
||||
$TIMEOUT "$MGMT" run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix empty &
|
||||
pid2=$!
|
||||
sleep 15s
|
||||
|
||||
$TIMEOUT "$MGMT" run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix empty &
|
||||
pid3=$!
|
||||
sleep 15s
|
||||
|
||||
$(sleep 15s && kill -SIGINT $pid1)& # send ^C to exit 1st mgmt (reversed!)
|
||||
wait $pid1
|
||||
e=$?
|
||||
if [ $e -ne 0 ]; then
|
||||
exit $e
|
||||
fi
|
||||
|
||||
$(sleep 15s && kill -SIGINT $pid2)& # send ^C to exit 2nd mgmt
|
||||
wait $pid2
|
||||
e=$?
|
||||
if [ $e -ne 0 ]; then
|
||||
exit $e
|
||||
fi
|
||||
|
||||
$(sleep 15s && kill -SIGINT $pid3)& # send ^C to exit 3rd mgmt (reversed!)
|
||||
wait $pid3 # get exit status
|
||||
# if pid3 exits because of a timeout, then it blocked, and this is a bug!
|
||||
exit $?
|
||||
24
test/shell/etcd-two-hosts-reversed.sh
Executable file
24
test/shell/etcd-two-hosts-reversed.sh
Executable file
@@ -0,0 +1,24 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
. "$(dirname "$0")/../util.sh"
|
||||
|
||||
# run empty graphs, we're just testing etcd clustering
|
||||
$TIMEOUT "$MGMT" run --hostname h1 --tmp-prefix empty &
|
||||
pid1=$!
|
||||
sleep 15s # let it startup
|
||||
|
||||
$TIMEOUT "$MGMT" run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix empty &
|
||||
pid2=$!
|
||||
sleep 15s
|
||||
|
||||
$(sleep 15s && kill -SIGINT $pid1)& # send ^C to exit 1st mgmt! (reverse!)
|
||||
wait $pid1
|
||||
e=$?
|
||||
if [ $e -ne 0 ]; then
|
||||
exit $e
|
||||
fi
|
||||
|
||||
$(sleep 15s && kill -SIGINT $pid2)& # send ^C to exit 2nd mgmt (reverse!)
|
||||
wait $pid2 # get exit status
|
||||
# if pid2 exits because of a timeout, then it blocked, and this is a bug!
|
||||
exit $?
|
||||
@@ -5,18 +5,58 @@
|
||||
set -o errexit
|
||||
set -o pipefail
|
||||
|
||||
$TIMEOUT "$MGMT" run --hostname h1 --ideal-cluster-size 1 --tmp-prefix --no-pgp lang --lang exchange0.mcl &
|
||||
$TIMEOUT "$MGMT" run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp lang --lang exchange0.mcl &
|
||||
$TIMEOUT "$MGMT" run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp lang --lang exchange0.mcl &
|
||||
$TIMEOUT "$MGMT" run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp lang --lang exchange0.mcl &
|
||||
$TIMEOUT "$MGMT" run --hostname h1 --tmp-prefix --no-pgp empty &
|
||||
pid1=$!
|
||||
sleep 10s
|
||||
$TIMEOUT "$MGMT" run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp empty &
|
||||
pid2=$!
|
||||
sleep 10s
|
||||
$TIMEOUT "$MGMT" run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp empty &
|
||||
pid3=$!
|
||||
sleep 10s
|
||||
$TIMEOUT "$MGMT" run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp empty &
|
||||
pid4=$!
|
||||
sleep 10s
|
||||
$TIMEOUT "$MGMT" deploy --no-git --seeds http://127.0.0.1:2379 lang --lang exchange0.mcl
|
||||
|
||||
# kill servers on error/exit
|
||||
trap 'pkill -9 mgmt' EXIT
|
||||
#trap 'pkill -9 mgmt' EXIT
|
||||
|
||||
# wait for everything to converge
|
||||
sleep 10s
|
||||
sleep 15s
|
||||
|
||||
# debug
|
||||
tail /tmp/mgmt/exchange-*
|
||||
|
||||
test "$(cat /tmp/mgmt/exchange-* | grep -c h1)" -eq 4
|
||||
test "$(cat /tmp/mgmt/exchange-* | grep -c h2)" -eq 4
|
||||
test "$(cat /tmp/mgmt/exchange-* | grep -c h3)" -eq 4
|
||||
test "$(cat /tmp/mgmt/exchange-* | grep -c h4)" -eq 4
|
||||
|
||||
$(sleep 15s && kill -SIGINT $pid4)& # send ^C to exit mgmt...
|
||||
wait $pid4
|
||||
e=$?
|
||||
if [ $e -ne 0 ]; then
|
||||
exit $e
|
||||
fi
|
||||
|
||||
$(sleep 15s && kill -SIGINT $pid3)& # send ^C to exit mgmt...
|
||||
wait $pid3
|
||||
e=$?
|
||||
if [ $e -ne 0 ]; then
|
||||
exit $e
|
||||
fi
|
||||
|
||||
$(sleep 15s && kill -SIGINT $pid2)& # send ^C to exit mgmt...
|
||||
wait $pid2
|
||||
e=$?
|
||||
if [ $e -ne 0 ]; then
|
||||
exit $e
|
||||
fi
|
||||
|
||||
$(sleep 15s && kill -SIGINT $pid1)& # send ^C to exit mgmt...
|
||||
wait $pid1
|
||||
e=$?
|
||||
if [ $e -ne 0 ]; then
|
||||
exit $e
|
||||
fi
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
# run this example with these commands
|
||||
# watch -n 0.1 'tail *' # run this in /tmp/mgmt/
|
||||
# time ./mgmt run --hostname h1 --ideal-cluster-size 1 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
|
||||
# time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
|
||||
# time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
|
||||
# time ./mgmt run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp lang --lang examples/lang/exchange0.mcl
|
||||
# time ./mgmt run --hostname h1 --tmp-prefix --no-pgp empty
|
||||
# time ./mgmt run --hostname h2 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2381 --server-urls http://127.0.0.1:2382 --tmp-prefix --no-pgp empty
|
||||
# time ./mgmt run --hostname h3 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2383 --server-urls http://127.0.0.1:2384 --tmp-prefix --no-pgp empty
|
||||
# time ./mgmt run --hostname h4 --seeds http://127.0.0.1:2379 --client-urls http://127.0.0.1:2385 --server-urls http://127.0.0.1:2386 --tmp-prefix --no-pgp empty
|
||||
# time ./mgmt deploy --no-git --seeds http://127.0.0.1:2379 lang --lang examples/lang/exchange0.mcl
|
||||
|
||||
import "sys"
|
||||
import "world"
|
||||
|
||||
@@ -65,6 +65,9 @@ function consistent-imports() {
|
||||
if grep $'\t"github.com/purpleidea/mgmt/engine/util"' "$1"; then # import as engineUtil
|
||||
return 1
|
||||
fi
|
||||
if grep '"golang.org/x/net/context"' "$1"; then # use built-in context
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# run go vet on a per-package basis
|
||||
|
||||
@@ -86,7 +86,7 @@ func TestEasyAckOnce2(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func ExampleSubscribeSync() {
|
||||
func ExampleSubscribedSignal() {
|
||||
fmt.Println("hello")
|
||||
|
||||
x := &SubscribedSignal{}
|
||||
|
||||
35
util/util.go
35
util/util.go
@@ -430,6 +430,21 @@ func TimeAfterOrBlockCtx(ctx context.Context, t int) <-chan struct{} {
|
||||
return ch
|
||||
}
|
||||
|
||||
// CloseAfter takes a duration, similarly to `time.After`, and returns a channel
|
||||
// that closes when either the context is done, or the duration expires.
|
||||
func CloseAfter(ctx context.Context, d time.Duration) <-chan struct{} {
|
||||
ch := make(chan struct{})
|
||||
go func() {
|
||||
defer close(ch)
|
||||
select {
|
||||
case <-time.After(d):
|
||||
// done
|
||||
case <-ctx.Done():
|
||||
}
|
||||
}()
|
||||
return ch
|
||||
}
|
||||
|
||||
// SystemBusPrivateUsable makes using the private bus usable.
|
||||
// TODO: should be upstream: https://github.com/godbus/dbus/issues/15
|
||||
func SystemBusPrivateUsable() (conn *dbus.Conn, err error) {
|
||||
@@ -468,6 +483,26 @@ func SessionBusPrivateUsable() (conn *dbus.Conn, err error) {
|
||||
return conn, nil // success
|
||||
}
|
||||
|
||||
// PriorityStrSliceSort filters any elements matching fn to the end of the list.
|
||||
// You can reverse the match result with a not to filter to the front instead!
|
||||
// A copy of the list is returned, the original is not modified.
|
||||
func PriorityStrSliceSort(input []string, fn func(string) bool) []string {
|
||||
output := []string{}
|
||||
found := []string{}
|
||||
for _, x := range input {
|
||||
if fn(x) { // if we find the key, don't include it just yet
|
||||
found = append(found, x) // save for later
|
||||
continue
|
||||
}
|
||||
output = append(output, x)
|
||||
}
|
||||
|
||||
// include the keys at the end (if found)
|
||||
output = append(output, found...)
|
||||
|
||||
return output
|
||||
}
|
||||
|
||||
// SortedStrSliceCompare takes two lists of strings and returns whether or not
|
||||
// they are equivalent. It will return nil if both sets contain the same
|
||||
// elements, regardless of order, and an error if they do not.
|
||||
|
||||
@@ -22,6 +22,7 @@ package util
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
@@ -1014,6 +1015,76 @@ func TestRemovePathPrefix0(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestPriorityStrSliceSort0(t *testing.T) {
|
||||
in := []string{"foo", "bar", "baz"}
|
||||
ex := []string{"bar", "baz", "foo"}
|
||||
|
||||
fn := func(x string) bool {
|
||||
return x == "foo"
|
||||
}
|
||||
out := PriorityStrSliceSort(in, fn)
|
||||
|
||||
if !reflect.DeepEqual(ex, out) {
|
||||
t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPriorityStrSliceSort1(t *testing.T) {
|
||||
in := []string{"foo", "bar", "baz"}
|
||||
ex := []string{"bar", "foo", "baz"}
|
||||
|
||||
fn := func(x string) bool {
|
||||
return x != "bar" // != brings this key to the front
|
||||
}
|
||||
out := PriorityStrSliceSort(in, fn)
|
||||
|
||||
if !reflect.DeepEqual(ex, out) {
|
||||
t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPriorityStrSliceSort2(t *testing.T) {
|
||||
in := []string{"bar", "foo", "bar", "bar", "baz"}
|
||||
ex := []string{"foo", "baz", "bar", "bar", "bar"}
|
||||
|
||||
fn := func(x string) bool {
|
||||
return x == "bar"
|
||||
}
|
||||
out := PriorityStrSliceSort(in, fn)
|
||||
|
||||
if !reflect.DeepEqual(ex, out) {
|
||||
t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPriorityStrSliceSort3(t *testing.T) {
|
||||
in := []string{"foo", "bar1", "bar2", "bar3", "baz"}
|
||||
ex := []string{"bar1", "bar2", "bar3", "foo", "baz"}
|
||||
|
||||
fn := func(x string) bool {
|
||||
return !strings.HasPrefix(x, "bar")
|
||||
}
|
||||
out := PriorityStrSliceSort(in, fn)
|
||||
|
||||
if !reflect.DeepEqual(ex, out) {
|
||||
t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPriorityStrSliceSort4(t *testing.T) {
|
||||
in := []string{"foo", "bar1", "bar2", "bar3", "baz"}
|
||||
ex := []string{"foo", "baz", "bar1", "bar2", "bar3"}
|
||||
|
||||
fn := func(x string) bool {
|
||||
return strings.HasPrefix(x, "bar")
|
||||
}
|
||||
out := PriorityStrSliceSort(in, fn)
|
||||
|
||||
if !reflect.DeepEqual(ex, out) {
|
||||
t.Errorf("PriorityStrSliceSort expected: %v; got: %v.", ex, out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSortedStrSliceCompare0(t *testing.T) {
|
||||
slice0 := []string{"foo", "bar", "baz"}
|
||||
slice1 := []string{"bar", "foo", "baz"}
|
||||
|
||||
2
vendor/github.com/coreos/etcd
generated
vendored
2
vendor/github.com/coreos/etcd
generated
vendored
Submodule vendor/github.com/coreos/etcd updated: 27fc7e2296...d57e8b8d97
@@ -18,6 +18,7 @@
|
||||
package yamlgraph
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
@@ -166,6 +167,10 @@ func (obj *GAPI) Next() chan gapi.Next {
|
||||
ch <- next
|
||||
return
|
||||
}
|
||||
// FIXME: add timeout to context
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
startChan := make(chan struct{}) // start signal
|
||||
close(startChan) // kick it off!
|
||||
|
||||
@@ -173,7 +178,16 @@ func (obj *GAPI) Next() chan gapi.Next {
|
||||
if obj.data.NoStreamWatch {
|
||||
watchChan = nil
|
||||
} else {
|
||||
watchChan = obj.data.World.ResWatch()
|
||||
var err error
|
||||
watchChan, err = obj.data.World.ResWatch(ctx)
|
||||
if err != nil {
|
||||
next := gapi.Next{
|
||||
Err: errwrap.Wrapf(err, "%s: could not start watch", Name),
|
||||
Exit: true, // exit, b/c programming error?
|
||||
}
|
||||
ch <- next
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
for {
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
package yamlgraph
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
@@ -168,6 +169,7 @@ func (obj *GraphConfig) Parse(data []byte) error {
|
||||
|
||||
// NewGraphFromConfig transforms a GraphConfig struct into a new graph.
|
||||
// FIXME: remove any possibly left over, now obsolete graph diff code from here!
|
||||
// TODO: add a timeout to replace context.TODO()
|
||||
func (obj *GraphConfig) NewGraphFromConfig(hostname string, world engine.World, noop bool) (*pgraph.Graph, error) {
|
||||
// hostname is the uuid for the host
|
||||
|
||||
@@ -224,7 +226,7 @@ func (obj *GraphConfig) NewGraphFromConfig(hostname string, world engine.World,
|
||||
}
|
||||
|
||||
// store in backend (usually etcd)
|
||||
if err := world.ResExport(resourceList); err != nil {
|
||||
if err := world.ResExport(context.TODO(), resourceList); err != nil {
|
||||
return nil, fmt.Errorf("Config: Could not export resources: %v", err)
|
||||
}
|
||||
|
||||
@@ -239,7 +241,7 @@ func (obj *GraphConfig) NewGraphFromConfig(hostname string, world engine.World,
|
||||
// database changes, we don't have a partial state of affairs...
|
||||
if len(kindFilter) > 0 { // if kindFilter is empty, don't need to do lookups!
|
||||
var err error
|
||||
resourceList, err = world.ResCollect(hostnameFilter, kindFilter)
|
||||
resourceList, err = world.ResCollect(context.TODO(), hostnameFilter, kindFilter)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Config: Could not collect resources: %v", err)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user