etcd: Rewrite embed etcd implementation

This is a giant cleanup of the etcd code. The earlier version was
written when I was less experienced with golang.

This is still not perfect, and does contain some races, but at least
it's a decent base to start from. The automatic elastic clustering
should be considered an experimental feature. If you need a more
battle-tested cluster, then you should manage etcd manually and point
mgmt at your existing cluster.
This commit is contained in:
James Shubin
2018-05-05 17:35:08 -04:00
parent fb275d9537
commit a5842a41b2
56 changed files with 5459 additions and 2654 deletions

98
etcd/chooser/chooser.go Normal file
View File

@@ -0,0 +1,98 @@
// Mgmt
// Copyright (C) 2013-2019+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package chooser
import (
"context"
"github.com/purpleidea/mgmt/etcd/interfaces"
etcdtypes "github.com/coreos/etcd/pkg/types"
)
// Data represents the input data that is passed to the chooser.
type Data struct {
// Hostname is the hostname running this chooser instance. It can be
// used as a unique key in the cluster.
Hostname string // ourself
Debug bool
Logf func(format string, v ...interface{})
}
// Chooser represents the interface you must implement if you want to be able to
// control which cluster members are added and removed. Remember that this can
// get run from any peer (server) machine in the cluster, and that this may
// change as different leaders are elected! Do not assume any state will remain
// between invocations. If you want to maintain hysteresis or state, make sure
// to synchronize it in etcd.
type Chooser interface {
// Validate validates the chooser implementation to ensure the params
// represent a valid instantiation.
Validate() error
// Init initializes the chooser and passes in some useful data and
// handles.
Init(*Data) error
// Connect will be called with a client interfaces.Client that you can
// use if necessary to store some shared state between instances of this
// and watch for external changes. Sharing state between members should
// be avoided if possible, and there is no guarantee that your data
// won't be deleted in a disaster. There are no backups for this,
// regenerate anything you might need. Additionally, this may only be
// used inside the Chooser method, since Connect is only called after
// Init. This is however very useful for implementing special choosers.
// Since some operations can run on connect, it gets a context. If you
// cancel this context, then you might expect that Watch could die too.
// Both of these should get cancelled if you call Disconnect.
Connect(context.Context, interfaces.Client) error // we get given a namespaced client
// Disconnect tells us to cancel our use of the client interface that we
// got from the Connect method. We must not return until we're done.
Disconnect() error
// Watch is called by the engine to allow us to Watch for changes that
// might cause us to want to re-evaluate our nomination decision. It
// should error if it cannot startup. Once it is running, it should send
// a nil error on every event, and an error if things go wrong. When
// Disconnect is shutdown, then that should cause this to exit. When
// this sends events, Choose will usually eventually get called in
// response.
Watch() (chan error, error)
// Choose takes the current peer membership state, and the available
// volunteers, and produces a list of who we should add and who should
// quit. In general, it's best to only remove one member at a time, in
// particular because this will get called iteratively on future events,
// and it can remove subsequent members on the next iteration. One
// important note: when building a new cluster, we do assume that out of
// one available volunteer, and no members, that this first volunteer is
// selected. Make sure that any implementations of this function do this
// as well, since otherwise the hardcoded initial assumption would be
// proven wrong here!
// TODO: we could pass in two lists of hostnames instead of the full
// URLsMap here, but let's keep it more complicated now in case, and
// reduce it down later if needed...
// TODO: should we add a step arg here ?
Choose(membership, volunteers etcdtypes.URLsMap) (nominees, quitters []string, err error)
// Close runs some cleanup routines in case there is anything that you'd
// like to free after we're done.
Close() error
}

285
etcd/chooser/dynamicsize.go Normal file
View File

@@ -0,0 +1,285 @@
// Mgmt
// Copyright (C) 2013-2019+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package chooser
import (
"context"
"fmt"
"strconv"
"sync"
"github.com/purpleidea/mgmt/etcd/interfaces"
etcd "github.com/coreos/etcd/clientv3"
etcdtypes "github.com/coreos/etcd/pkg/types"
)
// XXX: Test causing cluster shutdowns with:
// ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 put /_mgmt/chooser/dynamicsize/idealclustersize 0
// It is currently broken.
const (
// DefaultIdealDynamicSize is the default target ideal dynamic cluster
// size used for the initial cluster.
DefaultIdealDynamicSize = 5
// IdealDynamicSizePath is the path key used for the chooser. It usually
// gets used with a namespace prefix.
IdealDynamicSizePath = "/dynamicsize/idealclustersize"
)
// DynamicSize is a simple implementation of the Chooser interface. This helps
// select which machines to add and remove as we elastically grow and shrink our
// cluster.
// TODO: think of a better name
type DynamicSize struct {
// IdealClusterSize is the ideal target size for this cluster. If it is
// set to zero, then it will use DefaultIdealDynamicSize as the value.
IdealClusterSize uint16
data *Data // save for later
client interfaces.Client
ctx context.Context
cancel func()
wg *sync.WaitGroup
}
// Validate validates the struct.
func (obj *DynamicSize) Validate() error {
// TODO: if changed to zero, treat as a cluster shutdown signal
if obj.IdealClusterSize < 0 {
return fmt.Errorf("must choose a positive IdealClusterSize value")
}
return nil
}
// Init accepts some useful data and handles.
func (obj *DynamicSize) Init(data *Data) error {
if data.Hostname == "" {
return fmt.Errorf("can't Init with empty Hostname value")
}
if data.Logf == nil {
return fmt.Errorf("no Logf function was specified")
}
if obj.IdealClusterSize == 0 {
obj.IdealClusterSize = DefaultIdealDynamicSize
}
obj.data = data
obj.wg = &sync.WaitGroup{}
return nil
}
// Close runs some cleanup routines.
func (obj *DynamicSize) Close() error {
return nil
}
// Connect is called to accept an etcd.KV namespace that we can use.
func (obj *DynamicSize) Connect(ctx context.Context, client interfaces.Client) error {
obj.client = client
obj.ctx, obj.cancel = context.WithCancel(ctx)
size, err := DynamicSizeGet(obj.ctx, obj.client)
if err == interfaces.ErrNotExist || (err == nil && size <= 0) {
// unset, set in running cluster
changed, err := DynamicSizeSet(obj.ctx, obj.client, obj.IdealClusterSize)
if err == nil && changed {
obj.data.Logf("set dynamic cluster size to: %d", obj.IdealClusterSize)
}
return err
} else if err == nil && size >= 1 {
// unset, get from running cluster (use the valid cluster value)
if obj.IdealClusterSize != size {
obj.data.Logf("using dynamic cluster size of: %d", size)
}
obj.IdealClusterSize = size // get from exiting cluster...
}
return err
}
// Disconnect is called to cancel our use of the etcd.KV connection.
func (obj *DynamicSize) Disconnect() error {
if obj.client != nil { // if connect was not called, don't call this...
obj.cancel()
}
obj.wg.Wait()
return nil
}
// Watch is called to send events anytime we might want to change membership. It
// is also used to watch for changes so that when we get an event, we know to
// honour the change in Choose.
func (obj *DynamicSize) Watch() (chan error, error) {
// NOTE: The body of this function is very similar to the logic in the
// simple client.Watcher implementation that wraps ComplexWatcher.
path := IdealDynamicSizePath
cancelCtx, cancel := context.WithCancel(obj.ctx)
info, err := obj.client.ComplexWatcher(cancelCtx, path)
if err != nil {
defer cancel()
return nil, err
}
ch := make(chan error)
obj.wg.Add(1) // hook in to global wait group
go func() {
defer obj.wg.Done()
defer close(ch)
defer cancel()
var data *interfaces.WatcherData
var ok bool
for {
select {
case data, ok = <-info.Events: // read
if !ok {
return
}
case <-cancelCtx.Done():
continue // wait for ch closure, but don't block
}
size := obj.IdealClusterSize
for _, event := range data.Events { // apply each event
if event.Type != etcd.EventTypePut {
continue
}
key := string(event.Kv.Key)
key = key[len(data.Path):] // remove path prefix
val := string(event.Kv.Value)
if val == "" {
continue // ignore empty values
}
i, err := strconv.Atoi(val)
if err != nil {
continue // ignore bad values
}
size = uint16(i) // save
}
if size == obj.IdealClusterSize {
continue // no change
}
// set before sending the signal
obj.IdealClusterSize = size
if size == 0 { // zero means shutdown
obj.data.Logf("impending cluster shutdown...")
} else {
obj.data.Logf("got new dynamic cluster size of: %d", size)
}
select {
case ch <- data.Err: // send (might be nil!)
case <-cancelCtx.Done():
continue // wait for ch closure, but don't block
}
}
}()
return ch, nil
}
// Choose accepts a list of current membership, and a list of volunteers. From
// that we can decide who we should add and remove. We return a list of those
// nominated, and unnominated users respectively.
func (obj *DynamicSize) Choose(membership, volunteers etcdtypes.URLsMap) ([]string, []string, error) {
// Possible nominees include anyone that has volunteered, but that
// isn't a member.
if obj.data.Debug {
obj.data.Logf("goal: %d members", obj.IdealClusterSize)
}
nominees := []string{}
for hostname := range volunteers {
if _, exists := membership[hostname]; !exists {
nominees = append(nominees, hostname)
}
}
// Possible quitters include anyone that is a member, but that is not a
// volunteer. (They must have unvolunteered.)
quitters := []string{}
for hostname := range membership {
if _, exists := volunteers[hostname]; !exists {
quitters = append(quitters, hostname)
}
}
// What we want to know...
nominated := []string{}
unnominated := []string{}
// We should always only add ONE member at a time!
// TODO: is it okay to remove multiple members at the same time?
if len(nominees) > 0 && len(membership)-len(quitters) < int(obj.IdealClusterSize) {
//unnominated = []string{} // only do one operation at a time
nominated = []string{nominees[0]} // FIXME: use a better picker algorithm
} else if len(quitters) == 0 && len(membership) > int(obj.IdealClusterSize) { // too many members
//nominated = []string{} // only do one operation at a time
for kicked := range membership {
// don't kick ourself unless we are the only one left...
if kicked != obj.data.Hostname || (obj.IdealClusterSize == 0 && len(membership) == 1) {
unnominated = []string{kicked} // FIXME: use a better picker algorithm
break
}
}
} else if len(quitters) > 0 { // must do these before new unvolunteers
unnominated = quitters // get rid of the quitters
}
return nominated, unnominated, nil // perform these changes
}
// DynamicSizeGet gets the currently set dynamic size set in the cluster.
func DynamicSizeGet(ctx context.Context, client interfaces.Client) (uint16, error) {
key := IdealDynamicSizePath
m, err := client.Get(ctx, key) // (map[string]string, error)
if err != nil {
return 0, err
}
val, exists := m[IdealDynamicSizePath]
if !exists {
return 0, interfaces.ErrNotExist
}
i, err := strconv.Atoi(val)
if err != nil {
return 0, fmt.Errorf("bad value")
}
return uint16(i), nil
}
// DynamicSizeSet sets the dynamic size in the cluster. It returns true if it
// changed or set the value.
func DynamicSizeSet(ctx context.Context, client interfaces.Client, size uint16) (bool, error) {
key := IdealDynamicSizePath
val := strconv.FormatUint(uint64(size), 10) // fmt.Sprintf("%d", size)
ifCmps := []etcd.Cmp{
etcd.Compare(etcd.Value(key), "=", val), // desired state
}
elseOps := []etcd.Op{etcd.OpPut(key, val)}
resp, err := client.Txn(ctx, ifCmps, nil, elseOps)
if err != nil {
return false, err
}
// succeeded is set to true if the compare evaluated to true
changed := !resp.Succeeded
return changed, err
}