Files
mgmt/engine/resources/hetzner_vm.go
James Shubin d30ff6cfae legal: Remove year
Instead of constantly making these updates, let's just remove the year
since things are stored in git anyways, and this is not an actual modern
legal risk anymore.
2025-01-26 16:24:51 -05:00

1240 lines
50 KiB
Go

// Mgmt
// Copyright (C) James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// Additional permission under GNU GPL version 3 section 7
//
// If you modify this program, or any covered work, by linking or combining it
// with embedded mcl code and modules (and that the embedded mcl code and
// modules which link with this program, contain a copy of their source code in
// the authoritative form) containing parts covered by the terms of any other
// license, the licensors of this program grant you additional permission to
// convey the resulting work. Furthermore, the licensors of this program grant
// the original author, James Shubin, additional permission to update this
// additional permission if he deems it necessary to achieve the goals of this
// additional permission.
package resources
import (
"context"
"fmt"
"time"
"github.com/purpleidea/mgmt/engine"
"github.com/purpleidea/mgmt/engine/traits"
"github.com/purpleidea/mgmt/util/errwrap"
"github.com/hetznercloud/hcloud-go/hcloud"
)
const (
// HetznerStateUndefined leaves the state undefined by default. This state
// is always treated as converged. Changes to other params are only applied
// when the server is in a state that is compatible with the operations
// needed to make that change.
HetznerStateUndefined = ""
// HetznerStateExists indicates that the server must exist, without
// differentiation between "off", "running" or any transient states.
// If the server was absent, a new server is created in "off" state, with
// one exception: if the last observed state before a rebuild was "running"
// or "starting", rebuildServer will set the new server to "running".
HetznerStateExists = "exists"
// HetznerStateRunning indicates that the server must be powered on. If the
// server was absent, a new server is created in "running" state.
HetznerStateRunning = "running"
// HetznerStateOff indicates that the server must be powered off. If the
// server was absent, a new server is created in "off" state.
HetznerStateOff = "off"
// HetznerStateAbsent indicates that the server must be deleted/absent. If
// the server already existed, it is deleted. Note that this deletion is
// always executed if the "absent" state is explicitly specified!
HetznerStateAbsent = "absent"
// HetznerAllowRebuildError blocks any server rebuild requests in CheckApply
// and exits with an error. These rebuild requests occur when other resource
// params require a destructive rebuild to reach resource convergence. The
// error option is used by default to prevent unexpected server deletions.
HetznerAllowRebuildError = ""
// HetznerAllowRebuildIgnore blocks any server rebuild requests in
// CheckApply, but does not throw any errors. Instead, CheckApply must skip
// this rebuild, and continue further steps if possible. Use this option to
// prevent unexpected server deletions, without disrupting the mcl script.
HetznerAllowRebuildIgnore = "ignore"
// HetznerAllowRebuildIfNeeded allows server rebuilds within CheckApply.
// This is needed when the specified serverspecs are not (yet) aligned with
// the active instance. Use this option only if you are sure that you are
// not destroying any critical data or services!
HetznerAllowRebuildIfNeeded = "ifneeded"
// HetznerServerRescueDisabled disables rescue mode by default.
HetznerServerRescueDisabled = ""
// HetznerServerRescueTypeLinux32 is used to enable rescue mode with a
// linux32 image type.
HetznerServerRescueTypeLinux32 = "linux32"
// HetznerServerRescueTypeLinux64 is used to enable rescue mode with a
// linux64 image type.
HetznerServerRescueTypeLinux64 = "linux64"
// HetznerServerRescueTypeFreeBSD64 is used to enable rescue mode with a
// freebsd64 image type.
HetznerServerRescueTypeFreeBSD64 = "freebsd64"
// HetznerPollLimit sets a lower limit on polling interval in seconds.
// Since the Hetzner API supports requests at up to 3600 requests per hour,
// this limit is set to prevent rate limit errors in long term operation.
// NOTE: polling the same Hetzner project from multiple clients will require
// a larger polling interval to prevent the same rate limit error, since
// these requests all add to the query count of their shared project. It is
// recommended to use a polling interval of at least N seconds, with N the
// number of active hetzner:vm instances of the same project.
// NOTE: high rates of change to other params will require additional API
// queries at CheckApply. Increase the polling interval again to prevent
// rate limit errors if frequent updates are expected.
HetznerPollLimit = 1
// HetznerWaitIntervalLimit sets a lower limit on wait intervals in seconds.
// High request rates are allowed, but risk causing rate limit errors.
HetznerWaitIntervalLimit = 0
// HetznerWaitIntervalDefault sets a default wait interval in seconds.
// NOTE: use larger intervals when using many resources under the same
// Hetzner project, or when expecting consistently high rates of change to
// other resource parameters.
HetznerWaitIntervalDefault = 5
// HetznerWaitTimeoutDefault sets a default timeout limit in seconds.
HetznerWaitTimeoutDefault = 60 * 5
)
func init() {
engine.RegisterResource("hetzner:vm", func() engine.Res { return &HetznerVMRes{} })
}
// HetznerVMRes is a Hetzner cloud resource (1). It connects with the cloud API
// using the hcloud-go package provided by Hetzner. The API token for a new
// project must be generated manually, via the cloud console (2), before this
// resource can establish a connection with the API. One Hetzner resource
// represents one server instance, and multiple instances can be registered
// under the same project. A resource in the "absent" state only exists as a
// local mcl struct, and does not exist as server instance on Hetzner's side.
// NOTE: the Hetzner cloud console must be used to create a new project,
// generate the corresponding API token, and initialize the desired SSH keys.
// All registered SSH keys are used when creating a server, and a subset of
// those can be enabled for rescue mode via the "serverrescuekeys" param.
// NOTE: complete and up-to-date serverconfig options must be requested from the
// Hetzner API, but hcloud-go-getopts (3) provides a static reference.
// NOTE: this resources requires polling, via the "Meta:poll" param. The Hetzner
// API imposes a maximum rate of 3600 requests per hour that must be taken into
// account for intensive and/or long term operations. When running N hetzner:vm
// resources under the same Hetzner project, it is recommended to use a polling
// interval of at least N seconds. High rates of change to other params will
// require additional API requests at CheckApply. When frequent param updates
// are expected for long term operations, it is reommended to increase the
// polling interval again to prevent rate limit errors.
// NOTE: running multiple concurrent mcl scripts on the same resource might
// cause unexpected behavior in the API or the resource state. Use with care.
// TODO: build tests for hetzner:vm? But hcloud-go has no mocking package.
// 1) https://docs.hetzner.cloud/
// 2) https://console.hetzner.cloud/
// 3) https://github.com/jefmasereel/hcloud-go-getopts
type HetznerVMRes struct {
traits.Base
init *engine.Init
// APIToken specifies the unique API token corresponding to a Hetzner
// project. Keep this token private! It provides full access to this
// project, so a leaked token will be vulnerable to abuse. Read it from
// a local file or the mgmt deploy, or provide it directly as a string.
// NOTE: It must be generated manually via https://console.hetzner.cloud/.
// NOTE: This token is usually a 64 character alphanumeric string.
APIToken string `lang:"apitoken"`
// State specifies the desired state of the server instance. The supported
// options are "" (undefined), "absent", "exists", "off" and "running".
// HetznerStateUndefined ("") leaves the state undefined by default.
// HetznerStateExists ("exists") indicates that the server must exist.
// HetznerStateAbsent ("absent") indicates that the server must not exist.
// HetznerStateRunning ("running") tells the server it must be powered on.
// HetznerStateOff ("off") tells the server it must be powered off.
// NOTE: any other inputs will not pass Validate and result in an error.
// NOTE: setting the state of a live server to "absent" will delete all data
// and services that are located on that instance! Use with caution.
State string `lang:"state"`
// AllowRebuild provides flexible protection against unexpected server
// rebuilds. Any changes to the "servertype", "datacenter" or "image" params
// require a destructive rebuild, which deletes all data on that server.
// The user must explicitly allow these operations with AllowRebuild.
// Choose from three options: "ifneeded" allows all rebuilds that are needed
// by CheckApply to meet the specified params. "ignore" disables these
// rebuilds, but continues without error. The default option ("") disables
// always returns an error when CheckApply requests a rebuild.
// NOTE: Soft updates related to power and rescue mode are always allowed,
// because they are only required for explicit changes to resource fields.
// TODO: add AllowReboot if any indirect poweroffs are ever implemented.
AllowRebuild string `lang:"allowrebuild"`
// ServerType determines the machine type as defined by Hetzner. A complete
// and up-to-date list of options must be requested from the Hetzner API,
// but hcloud-go-getopts (url) provides a static reference. Basic servertype
// options include "cx11", "cx21", "cx31" etc.
// NOTE: make sure to check the price of the selected servertype! The listed
// examples are usually very cheap, but never free. Price and availability
// can also be dependent on the selected datacenter.
// https://github.com/JefMasereel/hcloud-go-getopts/
// TODO: set some kind of cost-based protection policy?
ServerType string `lang:"servertype"`
// Datacenter determines where the resource is hosted. A complete and
// up-to-date list of options must be requested from the Hetzner API, but
// hcloud-go-getopts (url) provides a static reference. The datacenter
// options include "nbg1-dc3", "fsn1-dc14", "hel1-dc2" etc.
// https://github.com/JefMasereel/hcloud-go-getopts/
Datacenter string `lang:"datacenter"`
// Image determines the operating system to be installed. A complete and
// up-to-date list of options must be requested from the Hetzner API, but
// hcloud-go-getopts (url) provides a static reference. The image type
// options include "centos-7", "ubuntu-18.04", "debian-10" etc.
// https://github.com/JefMasereel/hcloud-go-getopts/
Image string `lang:"image"`
// UserData can be used to run commands on the server instance at creation.
// https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html.
UserData string `lang:"userdata"`
// ServerRescueMode specifies the image type used when enabling rescue mode.
// The supported image types are "linux32", "linux64" and "freebsd64".
// Alternatively, leave this string empty to disable rescue mode (default).
// Other input values will not pass Validate and result in an error.
// NOTE: rescue mode can not be enabled if the server is absent.
// NOTE: Rescue mode can be used to log into the server over SSH and access
// the disks when the normal OS has trouble booting on its own.
ServerRescueMode string `lang:"serverrescuemode"`
// ServerRescueSSHKeys can be used to select a subset of keys that should be
// enabled for rescue mode operations over SSH. From all SSH keys known to
// the project client, choose a subset of keys by name, as an array of
// strings. New keys must first be added manually via the cloud console.
// An error is thrown if a given keyname is not recognized by the client.
// NOTE: live changes to this keylist while rescue mode is already enabled
// are not (yet) detected or applied by CheckApply.
// TODO: improve ssh key handling at checkApplyRescueMode and serverRebuild.
ServerRescueSSHKeys []string `lang:"serverrescuekeys"`
// WaitInterval is the interval in seconds that is used when waiting for
// transient states to converge between intermediate operations. A zero
// value causes the waiter to run without delays (burst requests). Although
// such burst requests are allowed, it is recommended to use a wait interval
// that keeps the total request rate under 3600 requests per hour. Take
// these factors into account: polling rate "Meta:poll", number of active
// resources under the same Hetzner project, and the expected rate of param
// updates. This will help to prevent rate limit errors.
WaitInterval uint32 `lang:"waitinterval"`
// WaitTimeout will cancel wait loops if they do not exit cleanly before
// the expected time in seconds, in order to detect defective loops and
// avoid unnecessary consumption of computational resources.
WaitTimeout uint32 `lang:"waittimeout"`
// client is required for hcloud-go to interact with the Hetzner API.
client *hcloud.Client
// server is a local copy of the server object returned by hcloud-go. If
// this is nil, the server is considered to be absent. Otherwise, this
// struct describes the properties of the server instance as registered with
// Hetzner at the time of the update request.
server *hcloud.Server
// serverconfig is a local copy of the serverCreateOpts struct generated
// with hcloud-go. This struct is dependent on the ServerType, Datacenter,
// Image and State params. These must be chosen from the valid options
// provided by Hetzner, see details on https://docs.hetzner.cloud/.
serverconfig hcloud.ServerCreateOpts
// lastObservedState is a local copy of the last observed state of the
// resource. This is used to determine the startAfterCreate option during
// server rebuilds when the state is "" (undefined).
lastObservedState hcloud.ServerStatus
// rescueKeys is a local copy of the array of SSH key values to be enabled
// in rescue mode, after formatting for direct use with hcloud-go.
rescueKeys []*hcloud.SSHKey
// rescueImage is a local copy of the image type used when rescue mode was
// enabled the last time, to give checkapplyrescuemode a static reference.
rescueImage hcloud.ServerRescueType
}
// Default returns some conservative defaults for this resource.
func (obj *HetznerVMRes) Default() engine.Res {
return &HetznerVMRes{
State: HetznerStateUndefined,
AllowRebuild: HetznerAllowRebuildError,
WaitInterval: HetznerWaitIntervalDefault,
WaitTimeout: HetznerWaitTimeoutDefault,
}
}
// Validate if the given param values are valid.
func (obj *HetznerVMRes) Validate() error {
// check for empty token
if obj.APIToken == "" {
return fmt.Errorf("empty token string")
}
// validate state param
switch obj.State {
case HetznerStateRunning, HetznerStateOff, HetznerStateAbsent:
// Valid: the server is in a well defined steady state.
case HetznerStateExists:
// Valid: the server exists (on, off or transient state).
case HetznerStateUndefined:
// Valid: the server state is left undefined (default).
default:
return fmt.Errorf("invalid state: %s", obj.State)
}
// validate allowrebuild
switch obj.AllowRebuild {
case HetznerAllowRebuildError, HetznerAllowRebuildIgnore, HetznerAllowRebuildIfNeeded:
// ok
default:
return fmt.Errorf("invalid allowrebuild: %s", obj.AllowRebuild)
}
// validate rescue mode parameters
switch obj.ServerRescueMode {
case HetznerServerRescueTypeLinux32, HetznerServerRescueTypeLinux64, HetznerServerRescueTypeFreeBSD64:
// valid options for rescue mode image
case HetznerServerRescueDisabled:
// valid option to disable rescue mode
default:
return fmt.Errorf("invalid serverrescuemode: %s", obj.ServerRescueMode)
}
// validate time params
if obj.MetaParams().Poll < HetznerPollLimit {
return fmt.Errorf("invalid polling interval (minimum %d s)", HetznerPollLimit)
}
if obj.WaitInterval < HetznerWaitIntervalLimit {
return fmt.Errorf("invalid wait interval (minimum %d)", HetznerWaitIntervalLimit)
}
return nil
}
// Init runs some startup code for this resource: initialize hcloud-go client,
// and then build some internal flags from the given public fields.
func (obj *HetznerVMRes) Init(init *engine.Init) error {
// save init struct
obj.init = init
// initialize hcloud-go client
obj.client = hcloud.NewClient(
hcloud.WithToken(obj.APIToken),
hcloud.WithApplication(obj.init.Program, obj.init.Version),
// TODO: hcloud.WithEndpoint(),
// TODO: hcloud.WithDebugWriter(),
)
// warn user about AllowRebuild setting
switch obj.AllowRebuild {
case HetznerAllowRebuildError:
obj.init.Logf("warning: server rebuild requests will be blocked with error")
case HetznerAllowRebuildIgnore:
obj.init.Logf("warning: server rebuild requests will be skipped without error")
case HetznerAllowRebuildIfNeeded:
obj.init.Logf("warning: server rebuild requests will be applied without error")
}
// warn user about late serverconfig validation
obj.init.Logf("warning: serverconfig options will only be validated during checkapply")
// warn user about timing requirements
obj.init.Logf("warning: Meta:poll must always be greater or equal than %d seconds", HetznerPollLimit)
obj.init.Logf("warning: waitinterval must always be greater or equal than %d seconds", HetznerWaitIntervalLimit)
return nil
}
// Cleanup is run by the engine to clean up after the resource is done. It
// deletes the authentication info before closing the resource.
func (obj *HetznerVMRes) Cleanup() error {
obj.APIToken = ""
obj.client = nil
return nil
}
// Watch is not implemented for this resource, since the Hetzner API does not
// provide any event streams. Instead, always use polling.
// NOTE: HetznerPollLimit sets an explicit minimum on the polling interval.
func (obj *HetznerVMRes) Watch(context.Context) error {
return fmt.Errorf("invalid Watch call: requires poll metaparam")
}
// CheckApply checks the resource state and determines what needs to happen for
// the HetznerVM resource to converge. It only applies the necessary changes if
// the bool apply is true. If the resource requires changes, CheckApply returns
// false regardless of the apply value, true otherwise. Any errors that might
// occur are wrapped and returned.
// NOTE: all functions that push changes to the Hetzner instance run a waitUntil
// call with the appropriate exit condition before returning, such that the
// requested operation is confirmed before continuing. This ensures that the
// "server" struct always contains up-to-date info of the live instance.
// NOTE: this last assumption might still fail in case the same resource
// instance is managed by multiple running mgmt instances!
// TODO: possible to ensure safe concurrency?
func (obj *HetznerVMRes) CheckApply(ctx context.Context, apply bool) (bool, error) {
checkOK := true
// Request up-to-date server info from the API.
if err := obj.getServerUpdate(ctx); err != nil {
return false, errwrap.Wrapf(err, "getServerUpdate failed")
}
// Try to get the server in the correct state (if it is not already there).
// NOTE: in case of undefined state, this always returns (true, nil).
if c, err := obj.checkApplyServerState(ctx, apply); err != nil {
return false, errwrap.Wrapf(err, "checkApplyServerState failed")
} else if !c {
checkOK = false
}
// If the intended state was not reached, exit here.
// NOTE: this prevents unnecessary checks and operations.
// NOTE: undefined state will pass! Further steps are applied if possible.
if stateOK, err := obj.serverStateConverged(); err != nil {
return false, errwrap.Wrapf(err, "serverStateConverged failed")
} else if !stateOK {
return false, nil
}
// Changes in cpu, location and/or image require a server rebuild.
// NOTE: these changes are only applied if the server exists.
if c, err := obj.checkApplyServerRebuild(ctx, apply); err != nil {
return false, errwrap.Wrapf(err, "checkApplyServerRebuild failed")
} else if !c {
checkOK = false
}
// Changes in rescue mode can be made without a destructive rebuild.
// NOTE: these changes are only applied if the server is running.
if c, err := obj.checkApplyRescueMode(ctx, apply); err != nil {
return false, errwrap.Wrapf(err, "checkApplyRescueMode failed")
} else if !c {
checkOK = false
}
return checkOK, nil
}
// checkApplyServerState tries to get the server in the correct state. If it is
// already there (converged), no changes are applied. In case of the undefined
// state, this function immediately returns (true, nil). Otherwise, it powers
// the server on/off, creates a new instance, or deletes the existing one as
// needed to reach the specified state.
// NOTE: the output arguments follow the rules of CheckApply: If the resource
// requires changes, CheckApply returns false regardless of the apply value,
// true otherwise. Any errors that might occur are wrapped and returned.
func (obj *HetznerVMRes) checkApplyServerState(ctx context.Context, apply bool) (bool, error) {
if obj.init.Debug {
obj.init.Logf("checkApplyServerState(apply: %t)", apply)
}
// Exit immediately if the server state is undefined.
if obj.State == HetznerStateUndefined {
return true, nil
}
// Make sure the server exists as intended before further checks.
serverCreationRequired := false
if obj.server == nil {
// The server doesn't exist as intended (state = "absent").
if obj.State == HetznerStateAbsent {
return true, nil
}
// Otherwise, the server should exist, but doesn't (yet).
serverCreationRequired = true
if !apply {
return false, nil
}
// Request the creation of a new server.
if err := obj.createServer(ctx); err != nil {
return false, errwrap.Wrapf(err, "createServer failed")
}
}
// If the resource only needs to exist, exit here.
if obj.State == HetznerStateExists {
return !serverCreationRequired, nil
}
// Otherwise, continue if/once the resource is in a steady state.
if err := obj.waitUntil(ctx, obj.serverInSteadyState); err != nil {
return false, errwrap.Wrapf(err, "waitUntil(serverInSteadyState) exited early")
}
// If the state has already converged, exit here.
stateConverged, err := obj.serverStateConverged()
if err != nil {
return false, errwrap.Wrapf(err, "serverStateConverged failed")
}
if checkOK := (stateConverged && !serverCreationRequired); stateConverged {
return checkOK, nil
}
// Otherwise, the server is in a steady state, but not the right one.
if !apply {
return false, nil
}
// Apply the necessary changes to get to the specified state.
switch obj.State {
case HetznerStateRunning:
if err := obj.powerServerOn(ctx); err != nil {
return false, errwrap.Wrapf(err, "powerServerOn failed")
}
case HetznerStateOff:
if err := obj.powerServerOff(ctx); err != nil {
return false, errwrap.Wrapf(err, "powerServerOff failed")
}
case HetznerStateAbsent:
if err := obj.deleteServer(ctx); err != nil {
return false, errwrap.Wrapf(err, "deleteServer failed")
}
default:
return false, fmt.Errorf("invalid state: %s", obj.State)
}
// All required state changes were applied without error.
return false, nil
}
// checkApplyServerRebuild checks the servertype, datacenter and image values of
// the live instance, and tries to rebuild the server when that is required to
// match the specified params.
// NOTE: AllowRebuild protects the user against unexpected server deletions.
// NOTE: the output arguments follow the rules of CheckApply: If the resource
// requires changes, CheckApply returns false regardless of the apply value,
// true otherwise. Any errors that might occur are wrapped and returned.
func (obj *HetznerVMRes) checkApplyServerRebuild(ctx context.Context, apply bool) (bool, error) {
if obj.init.Debug {
obj.init.Logf("checkApplyServerRebuild(apply: %t)", apply)
}
// Exit immediately if the server does not exist.
if obj.server == nil {
return false, nil
}
// Compare ServerType, Datacenter and Image params.
specsOK, err := obj.cmpServerSpecs()
if err != nil {
return false, errwrap.Wrapf(err, "cmpServerSpecs failed")
}
if specsOK {
return true, nil
}
if !apply {
return false, nil
}
// Rebuild the server to meet specs (if AllowRebuild passes).
// NOTE: if "undefined", this tries to match the last observed state.
if err := obj.rebuildServer(ctx); err != nil {
return false, errwrap.Wrapf(err, "rebuildServer failed")
}
return false, nil
}
// checkApplyRescueMode checks if the rescue mode is enabled (or disabled) as
// intended, and tries to disable (or enable) the rescue mode if needed to meet
// the specified parameters. When enabling rescue mode, the SSH keys specified
// by ServerRescueSSHKeys are validated and enabled for rescue login over SSH.
// NOTE: rescue mode changes require steady state ("off" or "running").
// NOTE: the output arguments follow the rules of CheckApply: If the resource
// requires changes, CheckApply returns false regardless of the apply value,
// true otherwise. Any errors that might occur are wrapped and returned.
// NOTE: switching image type in ServerRescueMode triggers this checkapply, but
// dynamic changes to the SSH keys are not yet supported.
// TODO: add "undefined" option for HetznerServerRescueMode? default?
// TODO: add support for rescue login via root password?
func (obj *HetznerVMRes) checkApplyRescueMode(ctx context.Context, apply bool) (bool, error) {
if obj.init.Debug {
obj.init.Logf("checkApplyRescueMode(apply: %t)", apply)
}
// Exit immediately if the server is absent.
// NOTE: an absent server is treated as rescue mode "disabled".
if obj.server == nil {
if obj.ServerRescueMode == HetznerServerRescueDisabled {
return true, nil
}
return false, nil
}
// Exit if the server is not in a steady state ("running" or "off").
// NOTE: otherwise the "server is locked" when trying to enable or disable.
stateOK, err := obj.serverInSteadyState()
if err != nil {
return false, errwrap.Wrapf(err, "serverInSteadyState failed")
}
if !stateOK {
return false, nil
}
// Exit if rescue mode is already in the intended configuration.
// TODO: add check for ssh keys? Only checking rescueImage.
rescueModeOK, err := obj.rescueModeConverged()
if err != nil {
return false, errwrap.Wrapf(err, "rescueModeConverged failed")
}
if rescueModeOK {
return true, nil
}
if !apply {
return false, nil
}
// Disable rescue mode to match specs, or to re-enable with new image type.
if err := obj.disableRescueMode(ctx); err != nil {
return false, errwrap.Wrapf(err, "disableRescueMode failed")
}
// Enable rescue mode if specified.
if obj.ServerRescueMode != HetznerServerRescueDisabled {
if err := obj.enableRescueMode(ctx); err != nil {
return false, errwrap.Wrapf(err, "enableRescueMode failed")
}
}
return false, nil
}
// getServerUpdate pings the Hetzner API for up-to-date server info.
// NOTE: if obj.server is nil, the server is considered to be in "absent" state.
func (obj *HetznerVMRes) getServerUpdate(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("getServerUpdate()")
}
server, _, err := obj.client.Server.GetByName(ctx, obj.Name())
if err != nil {
return errwrap.Wrapf(err, "failed serverupdate request")
}
obj.server = server
return nil
}
// Cmp compares two resource structs. Returns nil if the comparison holds true,
// otherwise an error is thrown to identify the difference.
func (obj *HetznerVMRes) Cmp(r engine.Res) error {
// check if empty
if obj == nil && r == nil {
return nil
}
if (obj == nil) != (r == nil) {
return fmt.Errorf("one resource is empty")
}
// compare types
res, ok := r.(*HetznerVMRes)
if !ok {
return fmt.Errorf("not a %s", obj.Kind())
}
// compare resource fields
if obj.APIToken != res.APIToken {
return fmt.Errorf("apitoken differs")
}
if obj.State != res.State {
return fmt.Errorf("state differs")
}
if obj.AllowRebuild != res.AllowRebuild {
return fmt.Errorf("allowrebuild differs")
}
if obj.ServerType != res.ServerType {
return fmt.Errorf("servertype differs")
}
if obj.Datacenter != res.Datacenter {
return fmt.Errorf("datacenter differs")
}
if obj.Image != res.Image {
return fmt.Errorf("image differs")
}
if obj.UserData != res.UserData {
return fmt.Errorf("userdata differs")
}
if obj.ServerRescueMode != res.ServerRescueMode {
return fmt.Errorf("serverrescuemode differs")
}
// TODO: more robust comparison of keylists
for i, key := range obj.ServerRescueSSHKeys {
if key != res.ServerRescueSSHKeys[i] {
return fmt.Errorf("serverrescuekeys differ")
}
}
if obj.WaitInterval != res.WaitInterval {
return fmt.Errorf("waitinterval differs")
}
if obj.WaitTimeout != res.WaitTimeout {
return fmt.Errorf("waittimeout differs")
}
return nil
}
// cmpServerSpecs compares the server specifications between the local mcl
// struct HetznerVMRes and the corresponding server instance. Returns true if
// ServerType, Datacenter and Image match. Returns an error if the server is
// absent.
func (obj *HetznerVMRes) cmpServerSpecs() (bool, error) {
if obj.init.Debug {
obj.init.Logf("cmpServerSpecs()")
}
if obj.server == nil {
return false, fmt.Errorf("server is unavailable")
}
if obj.ServerType != obj.server.ServerType.Name {
return false, nil
}
if obj.Datacenter != obj.server.Datacenter.Name {
return false, nil
}
if obj.Image != obj.server.Image.Name {
return false, nil
}
return true, nil
}
// powerServerOn requests a poweron for the specified server, then waits until
// the new "running" state is confirmed. Returns an error if the specified
// server is absent, or if waitUntil exits early due to timeout, context
// cancellation or another error.
func (obj *HetznerVMRes) powerServerOn(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("powerServerOn()")
}
if obj.server == nil {
return fmt.Errorf("server is unavailable")
}
if _, _, err := obj.client.Server.Poweron(ctx, obj.server); err != nil {
return errwrap.Wrapf(err, "client.Server.Poweron failed")
}
// Wait until the poweron is confirmed, error otherwise.
if err := obj.waitUntil(ctx, obj.serverStateIs(HetznerStateRunning)); err != nil {
return errwrap.Wrapf(err, "waitUntil(serverStateIs(Running)) exited early")
}
return nil
}
// powerServerOff requests a poweroff for the specified server, then waits until
// the new "off" state is confirmed. Returns an error if the specified server is
// absent, or if waitUntil exits early due to timeout, context cancellation or
// another error.
func (obj *HetznerVMRes) powerServerOff(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("powerServerOff()")
}
if obj.server == nil {
return fmt.Errorf("server is unavailable")
}
if _, _, err := obj.client.Server.Poweroff(ctx, obj.server); err != nil {
return errwrap.Wrapf(err, "client.Server.Poweroff failed")
}
// Wait until the poweroff is confirmed, error otherwise.
if err := obj.waitUntil(ctx, obj.serverStateIs(HetznerStateOff)); err != nil {
return errwrap.Wrapf(err, "waitUntil(serverStateIs(Off)) exited early")
}
return nil
}
// createServer checks if the servername does not already exists, builds the
// serverconfig in hcloud-go format from resource params, requests a server
// creation with that configuration, and waits until the creation is confirmed.
// Errors occur when the server exists already, the client fails, or the wait
// step exits early due context cancellation, client failure or timeout.
// NOTE: the startAfterCreate option is used to reach "running" state faster for
// two cases. When the state is specified as "running", or when the state is ""
// (undefined) and the last observed serverstatus was "running" or "starting".
func (obj *HetznerVMRes) createServer(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("createServer()")
}
if obj.server != nil {
return fmt.Errorf("server already exists")
}
if err := obj.getServerConfig(ctx); err != nil {
return errwrap.Wrapf(err, "getServerConfig failed")
}
if obj.serverconfig.SSHKeys == nil {
obj.init.Logf("warning: no ssh keys registered for server creation")
}
if _, _, err := obj.client.Server.Create(ctx, obj.serverconfig); err != nil {
return errwrap.Wrapf(err, "client.server.create failed")
}
if err := obj.waitUntil(ctx, obj.serverStateIs(HetznerStateExists)); err != nil {
return errwrap.Wrapf(err, "waitUntil(serverExists) exited early")
}
return nil
}
// deleteServer checks if the server is available from the client, requests a
// server deletion from the API, waits for confirmation and then returns. It
// returns an error when the server is already absent or something fails.
// Context cancellation allows a clean exit when needed.
// NOTE: a direct deleteServer call is never blocked. Use with caution.
func (obj *HetznerVMRes) deleteServer(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("deleteServer()")
}
if obj.server == nil {
return fmt.Errorf("server is already unavailable")
}
if _, err := obj.client.Server.Delete(ctx, obj.server); err != nil {
return errwrap.Wrapf(err, "client.server.delete failed")
}
if err := obj.waitUntil(ctx, obj.serverStateIs(HetznerStateAbsent)); err != nil {
return errwrap.Wrapf(err, "waitUntil(serverStateIs(Absent)) exited early")
}
return nil
}
// rebuildServer deletes the current server instance and creates a new one, in
// accordance with the provided resource specifications. If the state is ""
// (undefined), this function tries to match the last observed state of the live
// instance. If that last observed state is "absent", rebuild returns nil
// without creating a new server. Otherwise, the server must exist, and absence
// will result in an error.
// NOTE: AllowRebuild protects the user against unexpected server deletions:
// AllowRebuildError blocks deletion with error, AllowRebuildIgnore blocks
// deletion without error, and HetznerAllowRebuildIfNeeded allows deletion.
func (obj *HetznerVMRes) rebuildServer(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("rebuildServer()")
}
// Exit immediately if the server is absent.
if obj.server == nil {
// Leave undefined server as is, rebuild if/when it becomes available.
if obj.State == HetznerStateUndefined {
return nil
}
// Otherwise there is no reason to allow absence.
return fmt.Errorf("server is unavailable")
}
// Exit if rebuild is not allowed.
if obj.AllowRebuild == HetznerAllowRebuildError {
// exit without applying changes, throw error
return fmt.Errorf("server rebuild blocked, requires deletion")
}
if obj.AllowRebuild == HetznerAllowRebuildIgnore {
// exit without applying changes, but no error
return nil
}
// If the server exists but is undefined, save a temporary copy of the last
// observed state. This will be used to create the appropriate serverconfig.
if obj.State == HetznerStateUndefined {
obj.lastObservedState = obj.server.Status
}
// Rebuild.
if err := obj.deleteServer(ctx); err != nil {
return errwrap.Wrapf(err, "deleteServer failed")
}
if err := obj.createServer(ctx); err != nil {
return errwrap.Wrapf(err, "createServer failed")
}
return nil
}
// getServerConfig builds a serverconfig struct based on the given resource
// parameters, such that this serverconfig can be used to create a new server
// instance that matches the specified parameters. Errors can occur if the
// params used to construct serverconfig contain invalid arguments, or if the
// client fails.
// NOTE: the startAfterCreate option is used to reach "running" state faster for
// two cases. When the state is specified as "running", or when the state is ""
// (undefined) and the last observed serverstatus was "running" or "starting".
// TODO: add option to define Location xor Datacenter (never both!).
func (obj *HetznerVMRes) getServerConfig(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("getServerConfig()")
}
// default, volumes not supported (yet)
// TODO: add support for volume selection?
automount := false
// poweron at creation to reach "running" faster
startAfterCreate := false
if obj.State == HetznerStateRunning {
startAfterCreate = true
}
if obj.State == HetznerStateUndefined {
switch obj.lastObservedState {
case hcloud.ServerStatusRunning, hcloud.ServerStatusStarting:
startAfterCreate = true
default:
// leave powered off
}
}
// collect serverconfig elements
serverType, _, err := obj.client.ServerType.GetByName(ctx, obj.ServerType)
if err != nil {
return errwrap.Wrapf(err, "failed to collect ServerType struct")
}
image, _, err := obj.client.Image.GetByName(ctx, obj.Image)
if err != nil {
return errwrap.Wrapf(err, "failed to collect Image struct")
}
datacenter, _, err := obj.client.Datacenter.GetByName(ctx, obj.Datacenter)
if err != nil {
return errwrap.Wrapf(err, "failed to collect Datacenter struct")
}
// TODO: add more flexible key selection
keylist, err := obj.client.SSHKey.All(ctx)
if err != nil {
return errwrap.Wrapf(err, "failed to collect SSHKey array")
}
// NOTE: GetByName will return nil in case the given name is unknown.
if serverType == nil {
return fmt.Errorf("unknown servertype: %s", obj.ServerType)
}
if image == nil {
return fmt.Errorf("unknown image: %s", obj.Image)
}
if datacenter == nil {
return fmt.Errorf("unknown datacenter: %s", obj.Datacenter)
}
// build serverconfig from given specs & defaults
obj.serverconfig = hcloud.ServerCreateOpts{
Name: obj.Name(), // string
ServerType: serverType, // *ServerType
Image: image, // *Image
SSHKeys: keylist, // []*SSHKey
Location: nil, // *Location
Datacenter: datacenter, // *Datacenter
UserData: obj.UserData, // string
StartAfterCreate: &startAfterCreate, // *bool
Labels: nil, // map[string]string
Automount: &automount, // *bool
Volumes: nil, // []*Volume
Networks: nil, // []*Network
Firewalls: nil, // []*ServerCreateFirewall
PlacementGroup: nil, // *PlacementGroup
}
// hcloud-go provides basic validation, but this can still miss problems!
// TODO: add tests? If issues come up, add checks to Validate.
if err := hcloud.ServerCreateOpts.Validate(obj.serverconfig); err != nil {
return errwrap.Wrapf(err, "invalid serverconfig")
}
return nil
}
// enableRescueMode tries to enable rescue mode for the specified server, then
// waits until the operation is confirmed. Returns an error if the server is not
// in steady state, if an intermediate API request fails, if waitUntil exits
// early or in case of context cancellation.
// NOTE: the EnableRescue request requires steady state ("off" or "running").
func (obj *HetznerVMRes) enableRescueMode(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("enableRescueMode()")
}
// Exit immediately if the server is absent.
if obj.server == nil {
return fmt.Errorf("server is unavailable")
}
// Exit if rescue mode is already enabled.
if obj.server.RescueEnabled {
return nil
}
// Exit if the server is not in a steady state ("running" or "off").
// NOTE: otherwise the "server is locked" when trying to enable.
stateOK, err := obj.serverInSteadyState()
if err != nil {
return errwrap.Wrapf(err, "serverInSteadyState failed")
}
if !stateOK {
return fmt.Errorf("state must be 'running' or 'off' (now: %s)", obj.server.Status)
}
// Format rescueImage and rescueKeys, then enable rescue mode.
// NOTE: rescueImage and rescueKeys also provide a checkapply reference.
obj.rescueImage = hcloud.ServerRescueType(obj.ServerRescueMode)
if err := obj.getRescueKeys(ctx); err != nil {
return errwrap.Wrapf(err, "getRescueKeys failed")
}
opts := hcloud.ServerEnableRescueOpts{
Type: obj.rescueImage,
SSHKeys: obj.rescueKeys,
}
if _, _, err := obj.client.Server.EnableRescue(ctx, obj.server, opts); err != nil {
return errwrap.Wrapf(err, "client.Server.EnableRescue failed")
}
// NOTE: EnableRescue returns a root password, but this is ignored in favor
// of connecting to the server in rescue mode over SSH.
// TODO: add support for password login? SSH usually ok.
// Wait until the rescue enable is confirmed.
if err := obj.waitUntil(ctx, obj.rescueModeEnabled); err != nil {
return errwrap.Wrapf(err, "waitUntil(rescueModeEnabled) exited early")
}
return nil
}
// disableRescueMode tries to disable rescue mode for the specified server, then
// waits until the operation is confirmed. It returns early if the rescue mode
// is already disabled. Returns an error if an intermediate API request fails,
// if waitUntil exits early, or in case of context cancellation.
// NOTE: an absent server is treated as a disabled serverrescuemode.
// NOTE: the DisableRescue request requires steady state ("off" or "running").
func (obj *HetznerVMRes) disableRescueMode(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("disableRescueMode()")
}
// Exit immediately if rescue mode is already disabled.
if obj.server == nil {
return nil
}
if !obj.server.RescueEnabled {
return nil
}
// Exit if the server is not in a steady state ("running" or "off").
// NOTE: otherwise the "server is locked" when trying to enable.
stateOK, err := obj.serverInSteadyState()
if err != nil {
return errwrap.Wrapf(err, "serverInSteadyState failed")
}
if !stateOK {
return fmt.Errorf("state must be 'running' or 'off' (now: %s)", obj.server.Status)
}
// Disable rescue mode.
if _, _, err := obj.client.Server.DisableRescue(ctx, obj.server); err != nil {
return errwrap.Wrapf(err, "client.Server.EnableRescue failed")
}
// Wait until the rescue disable is confirmed.
if err := obj.waitUntil(ctx, obj.rescueModeDisabled); err != nil {
return errwrap.Wrapf(err, "waitUntil(rescueModeDisabled) exited early")
}
return nil
}
// getRescueKeys builds a list of keys to be enabled for rescue mode over SSH.
// ServerRescueSSHKeys provides the selected keys as []string by name. The
// corresponding data is collected with the Hetzner client (if valid). The
// resulting keylist is formatted as []*hcloud.SSHKey for use with hcloud, and
// saved for later use in private field rescueKeys.
// TODO: standardize this so that it can also be used for serverconfig keys.
func (obj *HetznerVMRes) getRescueKeys(ctx context.Context) error {
if obj.init.Debug {
obj.init.Logf("getRescueKeys()")
}
var keylist []*hcloud.SSHKey
for _, keyname := range obj.ServerRescueSSHKeys {
key, _, err := obj.client.SSHKey.GetByName(ctx, keyname)
if err != nil {
return errwrap.Wrapf(err, "SSHKey GetByName(%s) failed", keyname)
}
if key == nil {
return fmt.Errorf("unknown keyname: %s", keyname)
}
if obj.init.Debug {
obj.init.Logf("appending known key: %s", keyname)
}
keylist = append(keylist, key)
}
obj.rescueKeys = keylist
return nil
}
// waitUntil provides a general function that waits until the provided exit
// condition is satisfied. It retries every WaitInterval until the condition is
// satisfied. It can exit early in case the WaitTimeout is reached, the context
// is cancelled or an error occurs. Otherwise it returns nil once the condition
// is satisfied. The exit condition must check a well-defined condition for the
// resource, and return true if satisfied, false otherwise. The condition must
// check its logic without API requests, so no context is needed.
func (obj *HetznerVMRes) waitUntil(ctx context.Context, condition func() (bool, error)) error {
if obj.init.Debug {
obj.init.Logf("waitUntil()")
}
timeout := time.After(time.Duration(obj.WaitTimeout) * time.Second)
for {
// Get up-to-date serverinfo.
if err := obj.getServerUpdate(ctx); err != nil {
return errwrap.Wrapf(err, "failed serverupdate request")
}
// Check if the provided exit condition is satisfied.
conditionSatisfied, err := condition()
if err != nil {
return errwrap.Wrapf(err, "failed to confirm exit condition")
}
if conditionSatisfied {
return nil
}
// Retry every WaitInterval until the exit condition is satisfied.
// Can exit early by timeout, context cancellation or an error.
select {
case <-time.After(time.Duration(obj.WaitInterval) * time.Second):
// retry confirmation
case <-timeout:
return fmt.Errorf("timeout: exit condition not confirmed after %d seconds", obj.WaitTimeout)
case <-ctx.Done():
return errwrap.Wrapf(ctx.Err(), "wait interrupted by context")
}
}
}
// serverStateConverged checks if the target server is in the desired state.
// Returns true if the client confirms that the state is "exists", "running",
// "off" or "absent" as intended. An undefined state "" always returns true.
// Otherwise, this function returns false. Invalid states result in an error.
func (obj *HetznerVMRes) serverStateConverged() (converged bool, err error) {
if obj.init.Debug {
obj.init.Logf("serverStateConverged()")
}
// always return true if undefined
if obj.State == HetznerStateUndefined {
return true, nil
}
// return true if absent as intended
if obj.server == nil {
if obj.State == HetznerStateAbsent {
return true, nil
}
return false, nil
}
// convergence cases if the server exists
switch obj.State {
case HetznerStateAbsent:
// false, nil
case HetznerStateExists:
converged = true
case HetznerStateRunning:
converged = (obj.server.Status == hcloud.ServerStatusRunning)
case HetznerStateOff:
converged = (obj.server.Status == hcloud.ServerStatusOff)
default:
err = fmt.Errorf("invalid state: %s", obj.State)
}
return converged, err
}
// serverInSteadyState returns true if the server is in one of the two known
// steady states, i.e. "running" or "off", and false otherwise. Any other states
// are either transients or "absent", so it is safe to return false without
// errors and try again later if needed.
func (obj *HetznerVMRes) serverInSteadyState() (steady bool, err error) {
if obj.init.Debug {
obj.init.Logf("serverInSteadyState()")
}
if obj.server == nil {
return false, nil
}
switch obj.server.Status {
case hcloud.ServerStatusRunning, hcloud.ServerStatusOff:
return true, nil
default:
return false, nil
}
}
// rescueModeEnabled returns true if rescue mode is enabled, false otherwise.
func (obj *HetznerVMRes) rescueModeEnabled() (bool, error) {
if obj.init.Debug {
obj.init.Logf("rescueModeEnabled()")
}
if obj.server == nil {
return false, nil
}
if obj.server.RescueEnabled {
return true, nil
}
return false, nil
}
// rescueModeDisabled returns true if rescue mode is disabled, false otherwise.
// Server absence is also considered to "disable" rescue mode, and returns true.
func (obj *HetznerVMRes) rescueModeDisabled() (bool, error) {
if obj.init.Debug {
obj.init.Logf("rescueModeDisabled()")
}
if obj.server == nil {
return true, nil
}
if obj.server.RescueEnabled {
return false, nil
}
return true, nil
}
// rescueModeConverged returns true if the server's rescue mode is enabled or
// disabled as intended, false otherwise. Absence is treated as a valid case of
// disabled rescue mode. An error can only occur for invalid rescue images.
// TODO: review checks for image and ssh keys.
func (obj *HetznerVMRes) rescueModeConverged() (bool, error) {
if obj.init.Debug {
obj.init.Logf("rescueModeConverged()")
}
// check server existence
if obj.server == nil {
if obj.ServerRescueMode == HetznerServerRescueDisabled {
return true, nil
}
return false, nil
}
// check rescue mode
switch obj.ServerRescueMode {
case HetznerServerRescueDisabled:
// check if disabled as intended
if obj.server.RescueEnabled {
return false, nil
}
case HetznerServerRescueTypeLinux32, HetznerServerRescueTypeLinux64, HetznerServerRescueTypeFreeBSD64:
// check if enabled as intended
if !obj.server.RescueEnabled {
return false, nil
}
// check if the last used image type matches specs
// TODO: reference logic needs review
if obj.rescueImage != hcloud.ServerRescueType(obj.ServerRescueMode) {
return false, nil
}
// check if the last used keyset matches specs
// TODO: compare rescueKeys with ServerRescueSSHKeys?
default:
return false, fmt.Errorf("invalid ServerRescueMode: %s", obj.ServerRescueMode)
}
return true, nil
}
// serverStateIs returns a function that can be used with waitUntil. When this
// function is called, it returns true if the server status matches the state
// specified as input argument, false otherwise. It also returns false if the
// state argument is not supported. The supported states are "absent", "exists",
// "running", "off" and "" (undefined). Other inputs will result in an error.
// NOTE: hcloud states like ServerStatusUnknown and ServerStatusDeleting are
// also considered to be valid for state "exists". This is important to take
// into account when rewriting or adjusting any logic using this function.
func (obj *HetznerVMRes) serverStateIs(state string) func() (bool, error) {
if obj.init.Debug {
obj.init.Logf("serverStateIs(%s)", state)
}
return func() (bool, error) {
// Undefined state is always true.
if state == HetznerStateUndefined {
return true, nil
}
// Exit if the server is absent.
if obj.server == nil {
if state == HetznerStateAbsent {
return true, nil
}
return false, nil
}
// The server exists, but in the right state?
switch state {
case HetznerStateAbsent:
return false, nil
case HetznerStateExists:
return true, nil
case HetznerStateRunning:
if obj.server.Status == hcloud.ServerStatusRunning {
return true, nil
}
case HetznerStateOff:
if obj.server.Status == hcloud.ServerStatusOff {
return true, nil
}
default:
return false, fmt.Errorf("unsupported state: %s", state)
}
return false, nil
}
}