Files
mgmt/engine/resources/hetzner_vm.go
James Shubin c598e4d289 engine, etcd: Update code for latest gofmt fixes
Latest version of golang broken gofmt again...
2023-03-14 16:43:08 -04:00

1228 lines
49 KiB
Go

// Mgmt
// Copyright (C) 2013-2023+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package resources
import (
"context"
"fmt"
"time"
"github.com/purpleidea/mgmt/engine"
"github.com/purpleidea/mgmt/engine/traits"
"github.com/purpleidea/mgmt/util/errwrap"
"github.com/hetznercloud/hcloud-go/hcloud"
)
const (
// HetznerStateUndefined leaves the state undefined by default. This state
// is always treated as converged. Changes to other params are only applied
// when the server is in a state that is compatible with the operations
// needed to make that change.
HetznerStateUndefined = ""
// HetznerStateExists indicates that the server must exist, without
// differentiation between "off", "running" or any transient states.
// If the server was absent, a new server is created in "off" state, with
// one exception: if the last observed state before a rebuild was "running"
// or "starting", rebuildServer will set the new server to "running".
HetznerStateExists = "exists"
// HetznerStateRunning indicates that the server must be powered on. If the
// server was absent, a new server is created in "running" state.
HetznerStateRunning = "running"
// HetznerStateOff indicates that the server must be powered off. If the
// server was absent, a new server is created in "off" state.
HetznerStateOff = "off"
// HetznerStateAbsent indicates that the server must be deleted/absent. If
// the server already existed, it is deleted. Note that this deletion is
// always executed if the "absent" state is explicitly specified!
HetznerStateAbsent = "absent"
// HetznerAllowRebuildError blocks any server rebuild requests in CheckApply
// and exits with an error. These rebuild requests occur when other resource
// params require a destructive rebuild to reach resource convergence. The
// error option is used by default to prevent unexpected server deletions.
HetznerAllowRebuildError = ""
// HetznerAllowRebuildIgnore blocks any server rebuild requests in
// CheckApply, but does not throw any errors. Instead, CheckApply must skip
// this rebuild, and continue further steps if possible. Use this option to
// prevent unexpected server deletions, without disrupting the mcl script.
HetznerAllowRebuildIgnore = "ignore"
// HetznerAllowRebuildIfNeeded allows server rebuilds within CheckApply.
// This is needed when the specified serverspecs are not (yet) aligned with
// the active instance. Use this option only if you are sure that you are
// not destroying any critical data or services!
HetznerAllowRebuildIfNeeded = "ifneeded"
// HetznerServerRescueDisabled disables rescue mode by default.
HetznerServerRescueDisabled = ""
// HetznerServerRescueTypeLinux32 is used to enable rescue mode with a
// linux32 image type.
HetznerServerRescueTypeLinux32 = "linux32"
// HetznerServerRescueTypeLinux64 is used to enable rescue mode with a
// linux64 image type.
HetznerServerRescueTypeLinux64 = "linux64"
// HetznerServerRescueTypeFreeBSD64 is used to enable rescue mode with a
// freebsd64 image type.
HetznerServerRescueTypeFreeBSD64 = "freebsd64"
// HetznerPollLimit sets a lower limit on polling interval in seconds.
// Since the Hetzner API supports requests at up to 3600 requests per hour,
// this limit is set to prevent rate limit errors in long term operation.
// NOTE: polling the same Hetzner project from multiple clients will require
// a larger polling interval to prevent the same rate limit error, since
// these requests all add to the query count of their shared project. It is
// recommended to use a polling interval of at least N seconds, with N the
// number of active hetzner:vm instances of the same project.
// NOTE: high rates of change to other params will require additional API
// queries at CheckApply. Increase the polling interval again to prevent
// rate limit errors if frequent updates are expected.
HetznerPollLimit = 1
// HetznerWaitIntervalLimit sets a lower limit on wait intervals in seconds.
// High request rates are allowed, but risk causing rate limit errors.
HetznerWaitIntervalLimit = 0
// HetznerWaitIntervalDefault sets a default wait interval in seconds.
// NOTE: use larger intervals when using many resources under the same
// Hetzner project, or when expecting consistently high rates of change to
// other resource parameters.
HetznerWaitIntervalDefault = 5
// HetznerWaitTimeoutDefault sets a default timeout limit in seconds.
HetznerWaitTimeoutDefault = 60 * 5
)
func init() {
engine.RegisterResource("hetzner:vm", func() engine.Res { return &HetznerVMRes{} })
}
// HetznerVMRes is a Hetzner cloud resource (1). It connects with the cloud API
// using the hcloud-go package provided by Hetzner. The API token for a new
// project must be generated manually, via the cloud console (2), before this
// resource can establish a connection with the API. One Hetzner resource
// represents one server instance, and multiple instances can be registered
// under the same project. A resource in the "absent" state only exists as a
// local mcl struct, and does not exist as server instance on Hetzner's side.
// NOTE: the Hetzner cloud console must be used to create a new project,
// generate the corresponding API token, and initialize the desired SSH keys.
// All registered SSH keys are used when creating a server, and a subset of
// those can be enabled for rescue mode via the "serverrescuekeys" param.
// NOTE: complete and up-to-date serverconfig options must be requested from the
// Hetzner API, but hcloud-go-getopts (3) provides a static reference.
// NOTE: this resources requires polling, via the "Meta:poll" param. The Hetzner
// API imposes a maximum rate of 3600 requests per hour that must be taken into
// account for intensive and/or long term operations. When running N hetzner:vm
// resources under the same Hetzner project, it is recommended to use a polling
// interval of at least N seconds. High rates of change to other params will
// require additional API requests at CheckApply. When frequent param updates
// are expected for long term operations, it is reommended to increase the
// polling interval again to prevent rate limit errors.
// NOTE: running multiple concurrent mcl scripts on the same resource might
// cause unexpected behavior in the API or the resource state. Use with care.
// TODO: build tests for hetzner:vm? But hcloud-go has no mocking package.
// 1) https://docs.hetzner.cloud/
// 2) https://console.hetzner.cloud/
// 3) https://github.com/jefmasereel/hcloud-go-getopts
type HetznerVMRes struct {
traits.Base
init *engine.Init
// APIToken specifies the unique API token corresponding to a Hetzner
// project. Keep this token private! It provides full access to this
// project, so a leaked token will be vulnerable to abuse. Read it from
// a local file or the mgmt deploy, or provide it directly as a string.
// NOTE: It must be generated manually via https://console.hetzner.cloud/.
// NOTE: This token is usually a 64 character alphanumeric string.
APIToken string `lang:"apitoken"`
// State specifies the desired state of the server instance. The supported
// options are "" (undefined), "absent", "exists", "off" and "running".
// HetznerStateUndefined ("") leaves the state undefined by default.
// HetznerStateExists ("exists") indicates that the server must exist.
// HetznerStateAbsent ("absent") indicates that the server must not exist.
// HetznerStateRunning ("running") tells the server it must be powered on.
// HetznerStateOff ("off") tells the server it must be powered off.
// NOTE: any other inputs will not pass Validate and result in an error.
// NOTE: setting the state of a live server to "absent" will delete all data
// and services that are located on that instance! Use with caution.
State string `lang:"state"`
// AllowRebuild provides flexible protection against unexpected server
// rebuilds. Any changes to the "servertype", "datacenter" or "image" params
// require a destructive rebuild, which deletes all data on that server.
// The user must explicitly allow these operations with AllowRebuild.
// Choose from three options: "ifneeded" allows all rebuilds that are needed
// by CheckApply to meet the specified params. "ignore" disables these
// rebuilds, but continues without error. The default option ("") disables
// always returns an error when CheckApply requests a rebuild.
// NOTE: Soft updates related to power and rescue mode are always allowed,
// because they are only required for explicit changes to resource fields.
// TODO: add AllowReboot if any indirect poweroffs are ever implemented.
AllowRebuild string `lang:"allowrebuild"`
// ServerType determines the machine type as defined by Hetzner. A complete
// and up-to-date list of options must be requested from the Hetzner API,
// but hcloud-go-getopts (url) provides a static reference. Basic servertype
// options include "cx11", "cx21", "cx31" etc.
// NOTE: make sure to check the price of the selected servertype! The listed
// examples are usually very cheap, but never free. Price and availability
// can also be dependent on the selected datacenter.
// https://github.com/JefMasereel/hcloud-go-getopts/
// TODO: set some kind of cost-based protection policy?
ServerType string `lang:"servertype"`
// Datacenter determines where the resource is hosted. A complete and
// up-to-date list of options must be requested from the Hetzner API, but
// hcloud-go-getopts (url) provides a static reference. The datacenter
// options include "nbg1-dc3", "fsn1-dc14", "hel1-dc2" etc.
// https://github.com/JefMasereel/hcloud-go-getopts/
Datacenter string `lang:"datacenter"`
// Image determines the operating system to be installed. A complete and
// up-to-date list of options must be requested from the Hetzner API, but
// hcloud-go-getopts (url) provides a static reference. The image type
// options include "centos-7", "ubuntu-18.04", "debian-10" etc.
// https://github.com/JefMasereel/hcloud-go-getopts/
Image string `lang:"image"`
// UserData can be used to run commands on the server instance at creation.
// https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html.
UserData string `lang:"userdata"`
// ServerRescueMode specifies the image type used when enabling rescue mode.
// The supported image types are "linux32", "linux64" and "freebsd64".
// Alternatively, leave this string empty to disable rescue mode (default).
// Other input values will not pass Validate and result in an error.
// NOTE: rescue mode can not be enabled if the server is absent.
// NOTE: Rescue mode can be used to log into the server over SSH and access
// the disks when the normal OS has trouble booting on its own.
ServerRescueMode string `lang:"serverrescuemode"`
// ServerRescueSSHKeys can be used to select a subset of keys that should be
// enabled for rescue mode operations over SSH. From all SSH keys known to
// the project client, choose a subset of keys by name, as an array of
// strings. New keys must first be added manually via the cloud console.
// An error is thrown if a given keyname is not recognized by the client.
// NOTE: live changes to this keylist while rescue mode is already enabled
// are not (yet) detected or applied by CheckApply.
// TODO: improve ssh key handling at checkApplyRescueMode and serverRebuild.
ServerRescueSSHKeys []string `lang:"serverrescuekeys"`
// WaitInterval is the interval in seconds that is used when waiting for
// transient states to converge between intermediate operations. A zero
// value causes the waiter to run without delays (burst requests). Although
// such burst requests are allowed, it is recommended to use a wait interval
// that keeps the total request rate under 3600 requests per hour. Take
// these factors into account: polling rate "Meta:poll", number of active
// resources under the same Hetzner project, and the expected rate of param
// updates. This will help to prevent rate limit errors.
WaitInterval uint32 `lang:"waitinterval"`
// WaitTimeout will cancel wait loops if they do not exit cleanly before
// the expected time in seconds, in order to detect defective loops and
// avoid unnecessary consumption of computational resources.
WaitTimeout uint32 `lang:"waittimeout"`
// client is required for hcloud-go to interact with the Hetzner API.
client *hcloud.Client
// server is a local copy of the server object returned by hcloud-go. If
// this is nil, the server is considered to be absent. Otherwise, this
// struct describes the properties of the server instance as registered with
// Hetzner at the time of the update request.
server *hcloud.Server
// serverconfig is a local copy of the serverCreateOpts struct generated
// with hcloud-go. This struct is dependent on the ServerType, Datacenter,
// Image and State params. These must be chosen from the valid options
// provided by Hetzner, see details on https://docs.hetzner.cloud/.
serverconfig hcloud.ServerCreateOpts
// lastObservedState is a local copy of the last observed state of the
// resource. This is used to determine the startAfterCreate option during
// server rebuilds when the state is "" (undefined).
lastObservedState hcloud.ServerStatus
// rescueKeys is a local copy of the array of SSH key values to be enabled
// in rescue mode, after formatting for direct use with hcloud-go.
rescueKeys []*hcloud.SSHKey
// rescueImage is a local copy of the image type used when rescue mode was
// enabled the last time, to give checkapplyrescuemode a static reference.
rescueImage hcloud.ServerRescueType
}
// Default returns some conservative defaults for this resource.
func (obj *HetznerVMRes) Default() engine.Res {
return &HetznerVMRes{
State: HetznerStateUndefined,
AllowRebuild: HetznerAllowRebuildError,
WaitInterval: HetznerWaitIntervalDefault,
WaitTimeout: HetznerWaitTimeoutDefault,
}
}
// Validate if the given param values are valid.
func (obj *HetznerVMRes) Validate() error {
// check for empty token
if obj.APIToken == "" {
return fmt.Errorf("empty token string")
}
// validate state param
switch obj.State {
case HetznerStateRunning, HetznerStateOff, HetznerStateAbsent:
// Valid: the server is in a well defined steady state.
case HetznerStateExists:
// Valid: the server exists (on, off or transient state).
case HetznerStateUndefined:
// Valid: the server state is left undefined (default).
default:
return fmt.Errorf("invalid state: %s", obj.State)
}
// validate allowrebuild
switch obj.AllowRebuild {
case HetznerAllowRebuildError, HetznerAllowRebuildIgnore, HetznerAllowRebuildIfNeeded:
// ok
default:
return fmt.Errorf("invalid allowrebuild: %s", obj.AllowRebuild)
}
// validate rescue mode parameters
switch obj.ServerRescueMode {
case HetznerServerRescueTypeLinux32, HetznerServerRescueTypeLinux64, HetznerServerRescueTypeFreeBSD64:
// valid options for rescue mode image
case HetznerServerRescueDisabled:
// valid option to disable rescue mode
default:
return fmt.Errorf("invalid serverrescuemode: %s", obj.ServerRescueMode)
}
// validate time params
if obj.MetaParams().Poll < HetznerPollLimit {
return fmt.Errorf("invalid polling interval (minimum %d s)", HetznerPollLimit)
}
if obj.WaitInterval < HetznerWaitIntervalLimit {
return fmt.Errorf("invalid wait interval (minimum %d)", HetznerWaitIntervalLimit)
}
return nil
}
// Init runs some startup code for this resource: initialize hcloud-go client,
// and then build some internal flags from the given public fields.
func (obj *HetznerVMRes) Init(init *engine.Init) error {
// save init struct
obj.init = init
// initialize hcloud-go client
obj.client = hcloud.NewClient(
hcloud.WithToken(obj.APIToken),
hcloud.WithApplication(obj.init.Program, obj.init.Version),
// TODO: hcloud.WithEndpoint(),
// TODO: hcloud.WithDebugWriter(),
)
// warn user about AllowRebuild setting
switch obj.AllowRebuild {
case HetznerAllowRebuildError:
obj.init.Logf("warning: server rebuild requests will be blocked with error")
case HetznerAllowRebuildIgnore:
obj.init.Logf("warning: server rebuild requests will be skipped without error")
case HetznerAllowRebuildIfNeeded:
obj.init.Logf("warning: server rebuild requests will be applied without error")
}
// warn user about late serverconfig validation
obj.init.Logf("warning: serverconfig options will only be validated during checkapply")
// warn user about timing requirements
obj.init.Logf("warning: Meta:poll must always be greater or equal than %d seconds", HetznerPollLimit)
obj.init.Logf("warning: waitinterval must always be greater or equal than %d seconds", HetznerWaitIntervalLimit)
return nil
}
// Close deletes the authentication info before closing the resource.
func (obj *HetznerVMRes) Close() error {
obj.APIToken = ""
obj.client = nil
return nil
}
// Watch is not implemented for this resource, since the Hetzner API does not
// provide any event streams. Instead, always use polling.
// NOTE: HetznerPollLimit sets an explicit minimum on the polling interval.
func (obj *HetznerVMRes) Watch() error {
return fmt.Errorf("invalid Watch call: requires poll metaparam")
}
// CheckApply checks the resource state and determines what needs to happen for
// the HetznerVM resource to converge. It only applies the necessary changes if
// the bool apply is true. If the resource requires changes, CheckApply returns
// false regardless of the apply value, true otherwise. Any errors that might
// occur are wrapped and returned.
// NOTE: all functions that push changes to the Hetzner instance run a waitUntil
// call with the appropriate exit condition before returning, such that the
// requested operation is confirmed before continuing. This ensures that the
// "server" struct always contains up-to-date info of the live instance.
// NOTE: this last assumption might still fail in case the same resource
// instance is managed by multiple running mgmt instances!
// TODO: possible to ensure safe concurrency?
func (obj *HetznerVMRes) CheckApply(apply bool) (bool, error) {
checkOK := true
ctx := context.TODO()
// Request up-to-date server info from the API.
if err := obj.getServerUpdate(ctx); err != nil {
return false, errwrap.Wrapf(err, "getServerUpdate failed")
}
// Try to get the server in the correct state (if it is not already there).
// NOTE: in case of undefined state, this always returns (true, nil).
if c, err := obj.checkApplyServerState(ctx, apply); err != nil {
return false, errwrap.Wrapf(err, "checkApplyServerState failed")
} else if !c {
checkOK = false
}
// If the intended state was not reached, exit here.
// NOTE: this prevents unnecessary checks and operations.
// NOTE: undefined state will pass! Further steps are applied if possible.
if stateOK, err := obj.serverStateConverged(); err != nil {
return false, errwrap.Wrapf(err, "serverStateConverged failed")
} else if !stateOK {
return false, nil
}
// Changes in cpu, location and/or image require a server rebuild.
// NOTE: these changes are only applied if the server exists.
if c, err := obj.checkApplyServerRebuild(ctx, apply); err != nil {
return false, errwrap.Wrapf(err, "checkApplyServerRebuild failed")
} else if !c {
checkOK = false
}
// Changes in rescue mode can be made without a destructive rebuild.
// NOTE: these changes are only applied if the server is running.
if c, err := obj.checkApplyRescueMode(ctx, apply); err != nil {
return false, errwrap.Wrapf(err, "checkApplyRescueMode failed")
} else if !c {
checkOK = false
}
return checkOK, nil
}
// checkApplyServerState tries to get the server in the correct state. If it is
// already there (converged), no changes are applied. In case of the undefined
// state, this function immediately returns (true, nil). Otherwise, it powers
// the server on/off, creates a new instance, or deletes the existing one as
// needed to reach the specified state.
// NOTE: the output arguments follow the rules of CheckApply: If the resource
// requires changes, CheckApply returns false regardless of the apply value,
// true otherwise. Any errors that might occur are wrapped and returned.
func (obj *HetznerVMRes) checkApplyServerState(ctx context.Context, apply bool) (bool, error) {
if obj.init.Debug {
obj.init.Logf("checkApplyServerState(apply: %t)", apply)
}
// Exit immediately if the server state is undefined.
if obj.State == HetznerStateUndefined {
return true, nil
}
// Make sure the server exists as intended before further checks.
serverCreationRequired := false
if obj.server == nil {
// The server doesn't exist as intended (state = "absent").
if obj.State == HetznerStateAbsent {
return true, nil
}
// Otherwise, the server should exist, but doesn't (yet).
serverCreationRequired = true
if !apply {
return false, nil
}
// Request the creation of a new server.
if err := obj.createServer(ctx); err != nil {
return false, errwrap.Wrapf(err, "createServer failed")
}
}
// If the resource only needs to exist, exit here.
if obj.State == HetznerStateExists {
return !serverCreationRequired, nil
}
// Otherwise, continue if/once the resource is in a steady state.
if err := obj.waitUntil(ctx, obj.serverInSteadyState); err != nil {
return false, errwrap.Wrapf(err, "waitUntil(serverInSteadyState) exited early")
}
// If the state has already converged, exit here.
stateConverged, err := obj.serverStateConverged()
if err != nil {
return false, errwrap.Wrapf(err, "serverStateConverged failed")
}
if checkOK := (stateConverged && !serverCreationRequired); stateConverged {
return checkOK, nil
}
// Otherwise, the server is in a steady state, but not the right one.
if !apply {
return false, nil
}
// Apply the necessary changes to get to the specified state.
switch obj.State {
case HetznerStateRunning:
if err := obj.powerServerOn(ctx); err != nil {
return false, errwrap.Wrapf(err, "powerServerOn failed")
}
case HetznerStateOff:
if err := obj.powerServerOff(ctx); err != nil {
return false, errwrap.Wrapf(err, "powerServerOff failed")
}
case HetznerStateAbsent:
if err := obj.deleteServer(ctx); err != nil {
return false, errwrap.Wrapf(err, "deleteServer failed")
}
default:
return false, fmt.Errorf("invalid state: %s", obj.State)
}
// All required state changes were applied without error.
return false, nil
}
// checkApplyServerRebuild checks the servertype, datacenter and image values of
// the live instance, and tries to rebuild the server when that is required to
// match the specified params.
// NOTE: AllowRebuild protects the user against unexpected server deletions.
// NOTE: the output arguments follow the rules of CheckApply: If the resource
// requires changes, CheckApply returns false regardless of the apply value,
// true otherwise. Any errors that might occur are wrapped and returned.
func (obj *HetznerVMRes) checkApplyServerRebuild(ctx context.Context, apply bool) (bool, error) {
if obj.init.Debug {
obj.init.Logf("checkApplyServerRebuild(apply: %t)", apply)
}
// Exit immediately if the server does not exist.
if obj.server == nil {
return false, nil
}
// Compare ServerType, Datacenter and Image params.
specsOK, err := obj.cmpServerSpecs()
if err != nil {
return false, errwrap.Wrapf(err, "cmpServerSpecs failed")
}
if specsOK {
return true, nil
}
if !apply {
return false, nil
}
// Rebuild the server to meet specs (if AllowRebuild passes).
// NOTE: if "undefined", this tries to match the last observed state.
if err := obj.rebuildServer(ctx); err != nil {
return false, errwrap.Wrapf(err, "rebuildServer failed")
}
return false, nil
}
// checkApplyRescueMode checks if the rescue mode is enabled (or disabled) as
// intended, and tries to disable (or enable) the rescue mode if needed to meet
// the specified parameters. When enabling rescue mode, the SSH keys specified
// by ServerRescueSSHKeys are validated and enabled for rescue login over SSH.
// NOTE: rescue mode changes require steady state ("off" or "running").
// NOTE: the output arguments follow the rules of CheckApply: If the resource
// requires changes, CheckApply returns false regardless of the apply value,
// true otherwise. Any errors that might occur are wrapped and returned.
// NOTE: switching image type in ServerRescueMode triggers this checkapply, but
// dynamic changes to the SSH keys are not yet supported.
// TODO: add "undefined" option for HetznerServerRescueMode? default?
// TODO: add support for rescue login via root password?
func (obj *HetznerVMRes) checkApplyRescueMode(ctx context.Context, apply bool) (bool, error) {
if obj.init.Debug {
obj.init.Logf("checkApplyRescueMode(apply: %t)", apply)
}
// Exit immediately if the server is absent.
// NOTE: an absent server is treated as rescue mode "disabled".
if obj.server == nil {
if obj.ServerRescueMode == HetznerServerRescueDisabled {
return true, nil
}
return false, nil
}
// Exit if the server is not in a steady state ("running" or "off").
// NOTE: otherwise the "server is locked" when trying to enable or disable.
stateOK, err := obj.serverInSteadyState()
if err != nil {
return false, errwrap.Wrapf(err, "serverInSteadyState failed")
}
if !stateOK {
return false, nil
}
// Exit if rescue mode is already in the intended configuration.
// TODO: add check for ssh keys? Only checking rescueImage.
rescueModeOK, err := obj.rescueModeConverged()
if err != nil {
return false, errwrap.Wrapf(err, "rescueModeConverged failed")
}
if rescueModeOK {
return true, nil
}
if !apply {
return false, nil
}
// Disable rescue mode to match specs, or to re-enable with new image type.
if err := obj.disableRescueMode(ctx); err != nil {
return false, errwrap.Wrapf(err, "disableRescueMode failed")
}
// Enable rescue mode if specified.
if obj.ServerRescueMode != HetznerServerRescueDisabled {
if err := obj.enableRescueMode(ctx); err != nil {
return false, errwrap.Wrapf(err, "enableRescueMode failed")
}
}
return false, nil
}
// getServerUpdate pings the Hetzner API for up-to-date server info.
// NOTE: if obj.server is nil, the server is considered to be in "absent" state.
func (obj *HetznerVMRes) getServerUpdate(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("getServerUpdate()")
}
server, _, err := obj.client.Server.GetByName(ctx, obj.Name())
if err != nil {
return errwrap.Wrapf(err, "failed serverupdate request")
}
obj.server = server
return nil
}
// Cmp compares two resource structs. Returns nil if the comparison holds true,
// otherwise an error is thrown to identify the difference.
func (obj *HetznerVMRes) Cmp(r engine.Res) error {
// check if empty
if obj == nil && r == nil {
return nil
}
if (obj == nil) != (r == nil) {
return fmt.Errorf("one resource is empty")
}
// compare types
res, ok := r.(*HetznerVMRes)
if !ok {
return fmt.Errorf("not a %s", obj.Kind())
}
// compare resource fields
if obj.APIToken != res.APIToken {
return fmt.Errorf("apitoken differs")
}
if obj.State != res.State {
return fmt.Errorf("state differs")
}
if obj.AllowRebuild != res.AllowRebuild {
return fmt.Errorf("allowrebuild differs")
}
if obj.ServerType != res.ServerType {
return fmt.Errorf("servertype differs")
}
if obj.Datacenter != res.Datacenter {
return fmt.Errorf("datacenter differs")
}
if obj.Image != res.Image {
return fmt.Errorf("image differs")
}
if obj.UserData != res.UserData {
return fmt.Errorf("userdata differs")
}
if obj.ServerRescueMode != res.ServerRescueMode {
return fmt.Errorf("serverrescuemode differs")
}
// TODO: more robust comparison of keylists
for i, key := range obj.ServerRescueSSHKeys {
if key != res.ServerRescueSSHKeys[i] {
return fmt.Errorf("serverrescuekeys differ")
}
}
if obj.WaitInterval != res.WaitInterval {
return fmt.Errorf("waitinterval differs")
}
if obj.WaitTimeout != res.WaitTimeout {
return fmt.Errorf("waittimeout differs")
}
return nil
}
// cmpServerSpecs compares the server specifications between the local mcl
// struct HetznerVMRes and the corresponding server instance. Returns true if
// ServerType, Datacenter and Image match. Returns an error if the server is
// absent.
func (obj *HetznerVMRes) cmpServerSpecs() (bool, error) {
if obj.init.Debug {
obj.init.Logf("cmpServerSpecs()")
}
if obj.server == nil {
return false, fmt.Errorf("server is unavailable")
}
if obj.ServerType != obj.server.ServerType.Name {
return false, nil
}
if obj.Datacenter != obj.server.Datacenter.Name {
return false, nil
}
if obj.Image != obj.server.Image.Name {
return false, nil
}
return true, nil
}
// powerServerOn requests a poweron for the specified server, then waits until
// the new "running" state is confirmed. Returns an error if the specified
// server is absent, or if waitUntil exits early due to timeout, context
// cancellation or another error.
func (obj *HetznerVMRes) powerServerOn(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("powerServerOn()")
}
if obj.server == nil {
return fmt.Errorf("server is unavailable")
}
if _, _, err := obj.client.Server.Poweron(ctx, obj.server); err != nil {
return errwrap.Wrapf(err, "client.Server.Poweron failed")
}
// Wait until the poweron is confirmed, error otherwise.
if err := obj.waitUntil(ctx, obj.serverStateIs(HetznerStateRunning)); err != nil {
return errwrap.Wrapf(err, "waitUntil(serverStateIs(Running)) exited early")
}
return nil
}
// powerServerOff requests a poweroff for the specified server, then waits until
// the new "off" state is confirmed. Returns an error if the specified server is
// absent, or if waitUntil exits early due to timeout, context cancellation or
// another error.
func (obj *HetznerVMRes) powerServerOff(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("powerServerOff()")
}
if obj.server == nil {
return fmt.Errorf("server is unavailable")
}
if _, _, err := obj.client.Server.Poweroff(ctx, obj.server); err != nil {
return errwrap.Wrapf(err, "client.Server.Poweroff failed")
}
// Wait until the poweroff is confirmed, error otherwise.
if err := obj.waitUntil(ctx, obj.serverStateIs(HetznerStateOff)); err != nil {
return errwrap.Wrapf(err, "waitUntil(serverStateIs(Off)) exited early")
}
return nil
}
// createServer checks if the servername does not already exists, builds the
// serverconfig in hcloud-go format from resource params, requests a server
// creation with that configuration, and waits until the creation is confirmed.
// Errors occur when the server exists already, the client fails, or the wait
// step exits early due context cancellation, client failure or timeout.
// NOTE: the startAfterCreate option is used to reach "running" state faster for
// two cases. When the state is specified as "running", or when the state is ""
// (undefined) and the last observed serverstatus was "running" or "starting".
func (obj *HetznerVMRes) createServer(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("createServer()")
}
if obj.server != nil {
return fmt.Errorf("server already exists")
}
if err := obj.getServerConfig(ctx); err != nil {
return errwrap.Wrapf(err, "getServerConfig failed")
}
if obj.serverconfig.SSHKeys == nil {
obj.init.Logf("warning: no ssh keys registered for server creation")
}
if _, _, err := obj.client.Server.Create(ctx, obj.serverconfig); err != nil {
return errwrap.Wrapf(err, "client.server.create failed")
}
if err := obj.waitUntil(ctx, obj.serverStateIs(HetznerStateExists)); err != nil {
return errwrap.Wrapf(err, "waitUntil(serverExists) exited early")
}
return nil
}
// deleteServer checks if the server is available from the client, requests a
// server deletion from the API, waits for confirmation and then returns. It
// returns an error when the server is already absent or something fails.
// Context cancellation allows a clean exit when needed.
// NOTE: a direct deleteServer call is never blocked. Use with caution.
func (obj *HetznerVMRes) deleteServer(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("deleteServer()")
}
if obj.server == nil {
return fmt.Errorf("server is already unavailable")
}
if _, err := obj.client.Server.Delete(ctx, obj.server); err != nil {
return errwrap.Wrapf(err, "client.server.delete failed")
}
if err := obj.waitUntil(ctx, obj.serverStateIs(HetznerStateAbsent)); err != nil {
return errwrap.Wrapf(err, "waitUntil(serverStateIs(Absent)) exited early")
}
return nil
}
// rebuildServer deletes the current server instance and creates a new one, in
// accordance with the provided resource specifications. If the state is ""
// (undefined), this function tries to match the last observed state of the live
// instance. If that last observed state is "absent", rebuild returns nil
// without creating a new server. Otherwise, the server must exist, and absence
// will result in an error.
// NOTE: AllowRebuild protects the user against unexpected server deletions:
// AllowRebuildError blocks deletion with error, AllowRebuildIgnore blocks
// deletion without error, and HetznerAllowRebuildIfNeeded allows deletion.
func (obj *HetznerVMRes) rebuildServer(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("rebuildServer()")
}
// Exit immediately if the server is absent.
if obj.server == nil {
// Leave undefined server as is, rebuild if/when it becomes available.
if obj.State == HetznerStateUndefined {
return nil
}
// Otherwise there is no reason to allow absence.
return fmt.Errorf("server is unavailable")
}
// Exit if rebuild is not allowed.
if obj.AllowRebuild == HetznerAllowRebuildError {
// exit without applying changes, throw error
return fmt.Errorf("server rebuild blocked, requires deletion")
}
if obj.AllowRebuild == HetznerAllowRebuildIgnore {
// exit without applying changes, but no error
return nil
}
// If the server exists but is undefined, save a temporary copy of the last
// observed state. This will be used to create the appropriate serverconfig.
if obj.State == HetznerStateUndefined {
obj.lastObservedState = obj.server.Status
}
// Rebuild.
if err := obj.deleteServer(ctx); err != nil {
return errwrap.Wrapf(err, "deleteServer failed")
}
if err := obj.createServer(ctx); err != nil {
return errwrap.Wrapf(err, "createServer failed")
}
return nil
}
// getServerConfig builds a serverconfig struct based on the given resource
// parameters, such that this serverconfig can be used to create a new server
// instance that matches the specified parameters. Errors can occur if the
// params used to construct serverconfig contain invalid arguments, or if the
// client fails.
// NOTE: the startAfterCreate option is used to reach "running" state faster for
// two cases. When the state is specified as "running", or when the state is ""
// (undefined) and the last observed serverstatus was "running" or "starting".
// TODO: add option to define Location xor Datacenter (never both!).
func (obj *HetznerVMRes) getServerConfig(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("getServerConfig()")
}
// default, volumes not supported (yet)
// TODO: add support for volume selection?
automount := false
// poweron at creation to reach "running" faster
startAfterCreate := false
if obj.State == HetznerStateRunning {
startAfterCreate = true
}
if obj.State == HetznerStateUndefined {
switch obj.lastObservedState {
case hcloud.ServerStatusRunning, hcloud.ServerStatusStarting:
startAfterCreate = true
default:
// leave powered off
}
}
// collect serverconfig elements
serverType, _, err := obj.client.ServerType.GetByName(ctx, obj.ServerType)
if err != nil {
return errwrap.Wrapf(err, "failed to collect ServerType struct")
}
image, _, err := obj.client.Image.GetByName(ctx, obj.Image)
if err != nil {
return errwrap.Wrapf(err, "failed to collect Image struct")
}
datacenter, _, err := obj.client.Datacenter.GetByName(ctx, obj.Datacenter)
if err != nil {
return errwrap.Wrapf(err, "failed to collect Datacenter struct")
}
// TODO: add more flexible key selection
keylist, err := obj.client.SSHKey.All(ctx)
if err != nil {
return errwrap.Wrapf(err, "failed to collect SSHKey array")
}
// NOTE: GetByName will return nil in case the given name is unknown.
if serverType == nil {
return fmt.Errorf("unknown servertype: %s", obj.ServerType)
}
if image == nil {
return fmt.Errorf("unknown image: %s", obj.Image)
}
if datacenter == nil {
return fmt.Errorf("unknown datacenter: %s", obj.Datacenter)
}
// build serverconfig from given specs & defaults
obj.serverconfig = hcloud.ServerCreateOpts{
Name: obj.Name(), // string
ServerType: serverType, // *ServerType
Image: image, // *Image
SSHKeys: keylist, // []*SSHKey
Location: nil, // *Location
Datacenter: datacenter, // *Datacenter
UserData: obj.UserData, // string
StartAfterCreate: &startAfterCreate, // *bool
Labels: nil, // map[string]string
Automount: &automount, // *bool
Volumes: nil, // []*Volume
Networks: nil, // []*Network
Firewalls: nil, // []*ServerCreateFirewall
PlacementGroup: nil, // *PlacementGroup
}
// hcloud-go provides basic validation, but this can still miss problems!
// TODO: add tests? If issues come up, add checks to Validate.
if err := hcloud.ServerCreateOpts.Validate(obj.serverconfig); err != nil {
return errwrap.Wrapf(err, "invalid serverconfig")
}
return nil
}
// enableRescueMode tries to enable rescue mode for the specified server, then
// waits until the operation is confirmed. Returns an error if the server is not
// in steady state, if an intermediate API request fails, if waitUntil exits
// early or in case of context cancellation.
// NOTE: the EnableRescue request requires steady state ("off" or "running").
func (obj *HetznerVMRes) enableRescueMode(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("enableRescueMode()")
}
// Exit immediately if the server is absent.
if obj.server == nil {
return fmt.Errorf("server is unavailable")
}
// Exit if rescue mode is already enabled.
if obj.server.RescueEnabled {
return nil
}
// Exit if the server is not in a steady state ("running" or "off").
// NOTE: otherwise the "server is locked" when trying to enable.
stateOK, err := obj.serverInSteadyState()
if err != nil {
return errwrap.Wrapf(err, "serverInSteadyState failed")
}
if !stateOK {
return fmt.Errorf("state must be 'running' or 'off' (now: %s)", obj.server.Status)
}
// Format rescueImage and rescueKeys, then enable rescue mode.
// NOTE: rescueImage and rescueKeys also provide a checkapply reference.
obj.rescueImage = hcloud.ServerRescueType(obj.ServerRescueMode)
if err := obj.getRescueKeys(ctx); err != nil {
return errwrap.Wrapf(err, "getRescueKeys failed")
}
opts := hcloud.ServerEnableRescueOpts{
Type: obj.rescueImage,
SSHKeys: obj.rescueKeys,
}
if _, _, err := obj.client.Server.EnableRescue(ctx, obj.server, opts); err != nil {
return errwrap.Wrapf(err, "client.Server.EnableRescue failed")
}
// NOTE: EnableRescue returns a root password, but this is ignored in favor
// of connecting to the server in rescue mode over SSH.
// TODO: add support for password login? SSH usually ok.
// Wait until the rescue enable is confirmed.
if err := obj.waitUntil(ctx, obj.rescueModeEnabled); err != nil {
return errwrap.Wrapf(err, "waitUntil(rescueModeEnabled) exited early")
}
return nil
}
// disableRescueMode tries to disable rescue mode for the specified server, then
// waits until the operation is confirmed. It returns early if the rescue mode
// is already disabled. Returns an error if an intermediate API request fails,
// if waitUntil exits early, or in case of context cancellation.
// NOTE: an absent server is treated as a disabled serverrescuemode.
// NOTE: the DisableRescue request requires steady state ("off" or "running").
func (obj *HetznerVMRes) disableRescueMode(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("disableRescueMode()")
}
// Exit immediately if rescue mode is already disabled.
if obj.server == nil {
return nil
}
if !obj.server.RescueEnabled {
return nil
}
// Exit if the server is not in a steady state ("running" or "off").
// NOTE: otherwise the "server is locked" when trying to enable.
stateOK, err := obj.serverInSteadyState()
if err != nil {
return errwrap.Wrapf(err, "serverInSteadyState failed")
}
if !stateOK {
return fmt.Errorf("state must be 'running' or 'off' (now: %s)", obj.server.Status)
}
// Disable rescue mode.
if _, _, err := obj.client.Server.DisableRescue(ctx, obj.server); err != nil {
return errwrap.Wrapf(err, "client.Server.EnableRescue failed")
}
// Wait until the rescue disable is confirmed.
if err := obj.waitUntil(ctx, obj.rescueModeDisabled); err != nil {
return errwrap.Wrapf(err, "waitUntil(rescueModeDisabled) exited early")
}
return nil
}
// getRescueKeys builds a list of keys to be enabled for rescue mode over SSH.
// ServerRescueSSHKeys provides the selected keys as []string by name. The
// corresponding data is collected with the Hetzner client (if valid). The
// resulting keylist is formatted as []*hcloud.SSHKey for use with hcloud, and
// saved for later use in private field rescueKeys.
// TODO: standardize this so that it can also be used for serverconfig keys.
func (obj *HetznerVMRes) getRescueKeys(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("getRescueKeys()")
}
var keylist []*hcloud.SSHKey
for _, keyname := range obj.ServerRescueSSHKeys {
key, _, err := obj.client.SSHKey.GetByName(ctx, keyname)
if err != nil {
return errwrap.Wrapf(err, "SSHKey GetByName(%s) failed", keyname)
}
if key == nil {
return fmt.Errorf("unknown keyname: %s", keyname)
}
if obj.init.Debug {
obj.init.Logf("appending known key: %s", keyname)
}
keylist = append(keylist, key)
}
obj.rescueKeys = keylist
return nil
}
// waitUntil provides a general function that waits until the provided exit
// condition is satisfied. It retries every WaitInterval until the condition is
// satisfied. It can exit early in case the WaitTimeout is reached, the context
// is cancelled or an error occurs. Otherwise it returns nil once the condition
// is satisfied. The exit condition must check a well-defined condition for the
// resource, and return true if satisfied, false otherwise. The condition must
// check its logic without API requests, so no context is needed.
func (obj *HetznerVMRes) waitUntil(ctx context.Context, condition func() (bool, error)) error {
if obj.init.Debug {
obj.init.Logf("waitUntil()")
}
timeout := time.After(time.Duration(obj.WaitTimeout) * time.Second)
for {
// Get up-to-date serverinfo.
if err := obj.getServerUpdate(ctx); err != nil {
return errwrap.Wrapf(err, "failed serverupdate request")
}
// Check if the provided exit condition is satisfied.
conditionSatisfied, err := condition()
if err != nil {
return errwrap.Wrapf(err, "failed to confirm exit condition")
}
if conditionSatisfied {
return nil
}
// Retry every WaitInterval until the exit condition is satisfied.
// Can exit early by timeout, context cancellation or an error.
select {
case <-time.After(time.Duration(obj.WaitInterval) * time.Second):
// retry confirmation
case <-timeout:
return fmt.Errorf("timeout: exit condition not confirmed after %d seconds", obj.WaitTimeout)
case <-ctx.Done():
return errwrap.Wrapf(ctx.Err(), "wait interrupted by context")
}
}
}
// serverStateConverged checks if the target server is in the desired state.
// Returns true if the client confirms that the state is "exists", "running",
// "off" or "absent" as intended. An undefined state "" always returns true.
// Otherwise, this function returns false. Invalid states result in an error.
func (obj *HetznerVMRes) serverStateConverged() (converged bool, err error) {
if obj.init.Debug {
obj.init.Logf("serverStateConverged()")
}
// always return true if undefined
if obj.State == HetznerStateUndefined {
return true, nil
}
// return true if absent as intended
if obj.server == nil {
if obj.State == HetznerStateAbsent {
return true, nil
}
return false, nil
}
// convergence cases if the server exists
switch obj.State {
case HetznerStateAbsent:
// false, nil
case HetznerStateExists:
converged = true
case HetznerStateRunning:
converged = (obj.server.Status == hcloud.ServerStatusRunning)
case HetznerStateOff:
converged = (obj.server.Status == hcloud.ServerStatusOff)
default:
err = fmt.Errorf("invalid state: %s", obj.State)
}
return converged, err
}
// serverInSteadyState returns true if the server is in one of the two known
// steady states, i.e. "running" or "off", and false otherwise. Any other states
// are either transients or "absent", so it is safe to return false without
// errors and try again later if needed.
func (obj *HetznerVMRes) serverInSteadyState() (steady bool, err error) {
if obj.init.Debug {
obj.init.Logf("serverInSteadyState()")
}
if obj.server == nil {
return false, nil
}
switch obj.server.Status {
case hcloud.ServerStatusRunning, hcloud.ServerStatusOff:
return true, nil
default:
return false, nil
}
}
// rescueModeEnabled returns true if rescue mode is enabled, false otherwise.
func (obj *HetznerVMRes) rescueModeEnabled() (bool, error) {
if obj.init.Debug {
obj.init.Logf("rescueModeEnabled()")
}
if obj.server == nil {
return false, nil
}
if obj.server.RescueEnabled {
return true, nil
}
return false, nil
}
// rescueModeDisabled returns true if rescue mode is disabled, false otherwise.
// Server absence is also considered to "disable" rescue mode, and returns true.
func (obj *HetznerVMRes) rescueModeDisabled() (bool, error) {
if obj.init.Debug {
obj.init.Logf("rescueModeDisabled()")
}
if obj.server == nil {
return true, nil
}
if obj.server.RescueEnabled {
return false, nil
}
return true, nil
}
// rescueModeConverged returns true if the server's rescue mode is enabled or
// disabled as intended, false otherwise. Absence is treated as a valid case of
// disabled rescue mode. An error can only occur for invalid rescue images.
// TODO: review checks for image and ssh keys.
func (obj *HetznerVMRes) rescueModeConverged() (bool, error) {
if obj.init.Debug {
obj.init.Logf("rescueModeConverged()")
}
// check server existence
if obj.server == nil {
if obj.ServerRescueMode == HetznerServerRescueDisabled {
return true, nil
}
return false, nil
}
// check rescue mode
switch obj.ServerRescueMode {
case HetznerServerRescueDisabled:
// check if disabled as intended
if obj.server.RescueEnabled {
return false, nil
}
case HetznerServerRescueTypeLinux32, HetznerServerRescueTypeLinux64, HetznerServerRescueTypeFreeBSD64:
// check if enabled as intended
if !obj.server.RescueEnabled {
return false, nil
}
// check if the last used image type matches specs
// TODO: reference logic needs review
if obj.rescueImage != hcloud.ServerRescueType(obj.ServerRescueMode) {
return false, nil
}
// check if the last used keyset matches specs
// TODO: compare rescueKeys with ServerRescueSSHKeys?
default:
return false, fmt.Errorf("invalid ServerRescueMode: %s", obj.ServerRescueMode)
}
return true, nil
}
// serverStateIs returns a function that can be used with waitUntil. When this
// function is called, it returns true if the server status matches the state
// specified as input argument, false otherwise. It also returns false if the
// state argument is not supported. The supported states are "absent", "exists",
// "running", "off" and "" (undefined). Other inputs will result in an error.
// NOTE: hcloud states like ServerStatusUnknown and ServerStatusDeleting are
// also considered to be valid for state "exists". This is important to take
// into account when rewriting or adjusting any logic using this function.
func (obj *HetznerVMRes) serverStateIs(state string) func() (bool, error) {
if obj.init.Debug {
obj.init.Logf("serverStateIs(%s)", state)
}
return func() (bool, error) {
// Undefined state is always true.
if state == HetznerStateUndefined {
return true, nil
}
// Exit if the server is absent.
if obj.server == nil {
if state == HetznerStateAbsent {
return true, nil
}
return false, nil
}
// The server exists, but in the right state?
switch state {
case HetznerStateAbsent:
return false, nil
case HetznerStateExists:
return true, nil
case HetznerStateRunning:
if obj.server.Status == hcloud.ServerStatusRunning {
return true, nil
}
case HetznerStateOff:
if obj.server.Status == hcloud.ServerStatusOff {
return true, nil
}
default:
return false, fmt.Errorf("unsupported state: %s", state)
}
return false, nil
}
}