// Mgmt // Copyright (C) James Shubin and the project contributors // Written by James Shubin and the project contributors // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . // // Additional permission under GNU GPL version 3 section 7 // // If you modify this program, or any covered work, by linking or combining it // with embedded mcl code and modules (and that the embedded mcl code and // modules which link with this program, contain a copy of their source code in // the authoritative form) containing parts covered by the terms of any other // license, the licensors of this program grant you additional permission to // convey the resulting work. Furthermore, the licensors of this program grant // the original author, James Shubin, additional permission to update this // additional permission if he deems it necessary to achieve the goals of this // additional permission. package resources import ( "context" "fmt" "time" "github.com/purpleidea/mgmt/engine" "github.com/purpleidea/mgmt/engine/traits" "github.com/purpleidea/mgmt/util/errwrap" "github.com/hetznercloud/hcloud-go/hcloud" ) const ( // HetznerStateUndefined leaves the state undefined by default. This state // is always treated as converged. Changes to other params are only applied // when the server is in a state that is compatible with the operations // needed to make that change. HetznerStateUndefined = "" // HetznerStateExists indicates that the server must exist, without // differentiation between "off", "running" or any transient states. // If the server was absent, a new server is created in "off" state, with // one exception: if the last observed state before a rebuild was "running" // or "starting", rebuildServer will set the new server to "running". HetznerStateExists = "exists" // HetznerStateRunning indicates that the server must be powered on. If the // server was absent, a new server is created in "running" state. HetznerStateRunning = "running" // HetznerStateOff indicates that the server must be powered off. If the // server was absent, a new server is created in "off" state. HetznerStateOff = "off" // HetznerStateAbsent indicates that the server must be deleted/absent. If // the server already existed, it is deleted. Note that this deletion is // always executed if the "absent" state is explicitly specified! HetznerStateAbsent = "absent" // HetznerAllowRebuildError blocks any server rebuild requests in CheckApply // and exits with an error. These rebuild requests occur when other resource // params require a destructive rebuild to reach resource convergence. The // error option is used by default to prevent unexpected server deletions. HetznerAllowRebuildError = "" // HetznerAllowRebuildIgnore blocks any server rebuild requests in // CheckApply, but does not throw any errors. Instead, CheckApply must skip // this rebuild, and continue further steps if possible. Use this option to // prevent unexpected server deletions, without disrupting the mcl script. HetznerAllowRebuildIgnore = "ignore" // HetznerAllowRebuildIfNeeded allows server rebuilds within CheckApply. // This is needed when the specified serverspecs are not (yet) aligned with // the active instance. Use this option only if you are sure that you are // not destroying any critical data or services! HetznerAllowRebuildIfNeeded = "ifneeded" // HetznerServerRescueDisabled disables rescue mode by default. HetznerServerRescueDisabled = "" // HetznerServerRescueTypeLinux32 is used to enable rescue mode with a // linux32 image type. HetznerServerRescueTypeLinux32 = "linux32" // HetznerServerRescueTypeLinux64 is used to enable rescue mode with a // linux64 image type. HetznerServerRescueTypeLinux64 = "linux64" // HetznerServerRescueTypeFreeBSD64 is used to enable rescue mode with a // freebsd64 image type. HetznerServerRescueTypeFreeBSD64 = "freebsd64" // HetznerPollLimit sets a lower limit on polling interval in seconds. // Since the Hetzner API supports requests at up to 3600 requests per hour, // this limit is set to prevent rate limit errors in long term operation. // NOTE: polling the same Hetzner project from multiple clients will require // a larger polling interval to prevent the same rate limit error, since // these requests all add to the query count of their shared project. It is // recommended to use a polling interval of at least N seconds, with N the // number of active hetzner:vm instances of the same project. // NOTE: high rates of change to other params will require additional API // queries at CheckApply. Increase the polling interval again to prevent // rate limit errors if frequent updates are expected. HetznerPollLimit = 1 // HetznerWaitIntervalLimit sets a lower limit on wait intervals in seconds. // High request rates are allowed, but risk causing rate limit errors. HetznerWaitIntervalLimit = 0 // HetznerWaitIntervalDefault sets a default wait interval in seconds. // NOTE: use larger intervals when using many resources under the same // Hetzner project, or when expecting consistently high rates of change to // other resource parameters. HetznerWaitIntervalDefault = 5 // HetznerWaitTimeoutDefault sets a default timeout limit in seconds. HetznerWaitTimeoutDefault = 60 * 5 ) func init() { engine.RegisterResource("hetzner:vm", func() engine.Res { return &HetznerVMRes{} }) } // HetznerVMRes is a Hetzner cloud resource (1). It connects with the cloud API // using the hcloud-go package provided by Hetzner. The API token for a new // project must be generated manually, via the cloud console (2), before this // resource can establish a connection with the API. One Hetzner resource // represents one server instance, and multiple instances can be registered // under the same project. A resource in the "absent" state only exists as a // local mcl struct, and does not exist as server instance on Hetzner's side. // NOTE: the Hetzner cloud console must be used to create a new project, // generate the corresponding API token, and initialize the desired SSH keys. // All registered SSH keys are used when creating a server, and a subset of // those can be enabled for rescue mode via the "serverrescuekeys" param. // NOTE: complete and up-to-date serverconfig options must be requested from the // Hetzner API, but hcloud-go-getopts (3) provides a static reference. // NOTE: this resources requires polling, via the "Meta:poll" param. The Hetzner // API imposes a maximum rate of 3600 requests per hour that must be taken into // account for intensive and/or long term operations. When running N hetzner:vm // resources under the same Hetzner project, it is recommended to use a polling // interval of at least N seconds. High rates of change to other params will // require additional API requests at CheckApply. When frequent param updates // are expected for long term operations, it is reommended to increase the // polling interval again to prevent rate limit errors. // NOTE: running multiple concurrent mcl scripts on the same resource might // cause unexpected behavior in the API or the resource state. Use with care. // TODO: build tests for hetzner:vm? But hcloud-go has no mocking package. // 1) https://docs.hetzner.cloud/ // 2) https://console.hetzner.cloud/ // 3) https://github.com/jefmasereel/hcloud-go-getopts type HetznerVMRes struct { traits.Base init *engine.Init // APIToken specifies the unique API token corresponding to a Hetzner // project. Keep this token private! It provides full access to this // project, so a leaked token will be vulnerable to abuse. Read it from // a local file or the mgmt deploy, or provide it directly as a string. // NOTE: It must be generated manually via https://console.hetzner.cloud/. // NOTE: This token is usually a 64 character alphanumeric string. APIToken string `lang:"apitoken"` // State specifies the desired state of the server instance. The supported // options are "" (undefined), "absent", "exists", "off" and "running". // HetznerStateUndefined ("") leaves the state undefined by default. // HetznerStateExists ("exists") indicates that the server must exist. // HetznerStateAbsent ("absent") indicates that the server must not exist. // HetznerStateRunning ("running") tells the server it must be powered on. // HetznerStateOff ("off") tells the server it must be powered off. // NOTE: any other inputs will not pass Validate and result in an error. // NOTE: setting the state of a live server to "absent" will delete all data // and services that are located on that instance! Use with caution. State string `lang:"state"` // AllowRebuild provides flexible protection against unexpected server // rebuilds. Any changes to the "servertype", "datacenter" or "image" params // require a destructive rebuild, which deletes all data on that server. // The user must explicitly allow these operations with AllowRebuild. // Choose from three options: "ifneeded" allows all rebuilds that are needed // by CheckApply to meet the specified params. "ignore" disables these // rebuilds, but continues without error. The default option ("") disables // always returns an error when CheckApply requests a rebuild. // NOTE: Soft updates related to power and rescue mode are always allowed, // because they are only required for explicit changes to resource fields. // TODO: add AllowReboot if any indirect poweroffs are ever implemented. AllowRebuild string `lang:"allowrebuild"` // ServerType determines the machine type as defined by Hetzner. A complete // and up-to-date list of options must be requested from the Hetzner API, // but hcloud-go-getopts (url) provides a static reference. Basic servertype // options include "cx11", "cx21", "cx31" etc. // NOTE: make sure to check the price of the selected servertype! The listed // examples are usually very cheap, but never free. Price and availability // can also be dependent on the selected datacenter. // https://github.com/JefMasereel/hcloud-go-getopts/ // TODO: set some kind of cost-based protection policy? ServerType string `lang:"servertype"` // Datacenter determines where the resource is hosted. A complete and // up-to-date list of options must be requested from the Hetzner API, but // hcloud-go-getopts (url) provides a static reference. The datacenter // options include "nbg1-dc3", "fsn1-dc14", "hel1-dc2" etc. // https://github.com/JefMasereel/hcloud-go-getopts/ Datacenter string `lang:"datacenter"` // Image determines the operating system to be installed. A complete and // up-to-date list of options must be requested from the Hetzner API, but // hcloud-go-getopts (url) provides a static reference. The image type // options include "centos-7", "ubuntu-18.04", "debian-10" etc. // https://github.com/JefMasereel/hcloud-go-getopts/ Image string `lang:"image"` // UserData can be used to run commands on the server instance at creation. // https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html. UserData string `lang:"userdata"` // ServerRescueMode specifies the image type used when enabling rescue mode. // The supported image types are "linux32", "linux64" and "freebsd64". // Alternatively, leave this string empty to disable rescue mode (default). // Other input values will not pass Validate and result in an error. // NOTE: rescue mode can not be enabled if the server is absent. // NOTE: Rescue mode can be used to log into the server over SSH and access // the disks when the normal OS has trouble booting on its own. ServerRescueMode string `lang:"serverrescuemode"` // ServerRescueSSHKeys can be used to select a subset of keys that should be // enabled for rescue mode operations over SSH. From all SSH keys known to // the project client, choose a subset of keys by name, as an array of // strings. New keys must first be added manually via the cloud console. // An error is thrown if a given keyname is not recognized by the client. // NOTE: live changes to this keylist while rescue mode is already enabled // are not (yet) detected or applied by CheckApply. // TODO: improve ssh key handling at checkApplyRescueMode and serverRebuild. ServerRescueSSHKeys []string `lang:"serverrescuekeys"` // WaitInterval is the interval in seconds that is used when waiting for // transient states to converge between intermediate operations. A zero // value causes the waiter to run without delays (burst requests). Although // such burst requests are allowed, it is recommended to use a wait interval // that keeps the total request rate under 3600 requests per hour. Take // these factors into account: polling rate "Meta:poll", number of active // resources under the same Hetzner project, and the expected rate of param // updates. This will help to prevent rate limit errors. WaitInterval uint32 `lang:"waitinterval"` // WaitTimeout will cancel wait loops if they do not exit cleanly before // the expected time in seconds, in order to detect defective loops and // avoid unnecessary consumption of computational resources. WaitTimeout uint32 `lang:"waittimeout"` // client is required for hcloud-go to interact with the Hetzner API. client *hcloud.Client // server is a local copy of the server object returned by hcloud-go. If // this is nil, the server is considered to be absent. Otherwise, this // struct describes the properties of the server instance as registered with // Hetzner at the time of the update request. server *hcloud.Server // serverconfig is a local copy of the serverCreateOpts struct generated // with hcloud-go. This struct is dependent on the ServerType, Datacenter, // Image and State params. These must be chosen from the valid options // provided by Hetzner, see details on https://docs.hetzner.cloud/. serverconfig hcloud.ServerCreateOpts // lastObservedState is a local copy of the last observed state of the // resource. This is used to determine the startAfterCreate option during // server rebuilds when the state is "" (undefined). lastObservedState hcloud.ServerStatus // rescueKeys is a local copy of the array of SSH key values to be enabled // in rescue mode, after formatting for direct use with hcloud-go. rescueKeys []*hcloud.SSHKey // rescueImage is a local copy of the image type used when rescue mode was // enabled the last time, to give checkapplyrescuemode a static reference. rescueImage hcloud.ServerRescueType } // Default returns some conservative defaults for this resource. func (obj *HetznerVMRes) Default() engine.Res { return &HetznerVMRes{ State: HetznerStateUndefined, AllowRebuild: HetznerAllowRebuildError, WaitInterval: HetznerWaitIntervalDefault, WaitTimeout: HetznerWaitTimeoutDefault, } } // Validate if the given param values are valid. func (obj *HetznerVMRes) Validate() error { // check for empty token if obj.APIToken == "" { return fmt.Errorf("empty token string") } // validate state param switch obj.State { case HetznerStateRunning, HetznerStateOff, HetznerStateAbsent: // Valid: the server is in a well defined steady state. case HetznerStateExists: // Valid: the server exists (on, off or transient state). case HetznerStateUndefined: // Valid: the server state is left undefined (default). default: return fmt.Errorf("invalid state: %s", obj.State) } // validate allowrebuild switch obj.AllowRebuild { case HetznerAllowRebuildError, HetznerAllowRebuildIgnore, HetznerAllowRebuildIfNeeded: // ok default: return fmt.Errorf("invalid allowrebuild: %s", obj.AllowRebuild) } // validate rescue mode parameters switch obj.ServerRescueMode { case HetznerServerRescueTypeLinux32, HetznerServerRescueTypeLinux64, HetznerServerRescueTypeFreeBSD64: // valid options for rescue mode image case HetznerServerRescueDisabled: // valid option to disable rescue mode default: return fmt.Errorf("invalid serverrescuemode: %s", obj.ServerRescueMode) } // validate time params if obj.MetaParams().Poll < HetznerPollLimit { return fmt.Errorf("invalid polling interval (minimum %d s)", HetznerPollLimit) } if obj.WaitInterval < HetznerWaitIntervalLimit { return fmt.Errorf("invalid wait interval (minimum %d)", HetznerWaitIntervalLimit) } return nil } // Init runs some startup code for this resource: initialize hcloud-go client, // and then build some internal flags from the given public fields. func (obj *HetznerVMRes) Init(init *engine.Init) error { // save init struct obj.init = init // initialize hcloud-go client obj.client = hcloud.NewClient( hcloud.WithToken(obj.APIToken), hcloud.WithApplication(obj.init.Program, obj.init.Version), // TODO: hcloud.WithEndpoint(), // TODO: hcloud.WithDebugWriter(), ) // warn user about AllowRebuild setting switch obj.AllowRebuild { case HetznerAllowRebuildError: obj.init.Logf("warning: server rebuild requests will be blocked with error") case HetznerAllowRebuildIgnore: obj.init.Logf("warning: server rebuild requests will be skipped without error") case HetznerAllowRebuildIfNeeded: obj.init.Logf("warning: server rebuild requests will be applied without error") } // warn user about late serverconfig validation obj.init.Logf("warning: serverconfig options will only be validated during checkapply") // warn user about timing requirements obj.init.Logf("warning: Meta:poll must always be greater or equal than %d seconds", HetznerPollLimit) obj.init.Logf("warning: waitinterval must always be greater or equal than %d seconds", HetznerWaitIntervalLimit) return nil } // Cleanup is run by the engine to clean up after the resource is done. It // deletes the authentication info before closing the resource. func (obj *HetznerVMRes) Cleanup() error { obj.APIToken = "" obj.client = nil return nil } // Watch is not implemented for this resource, since the Hetzner API does not // provide any event streams. Instead, always use polling. // NOTE: HetznerPollLimit sets an explicit minimum on the polling interval. func (obj *HetznerVMRes) Watch(context.Context) error { return fmt.Errorf("invalid Watch call: requires poll metaparam") } // CheckApply checks the resource state and determines what needs to happen for // the HetznerVM resource to converge. It only applies the necessary changes if // the bool apply is true. If the resource requires changes, CheckApply returns // false regardless of the apply value, true otherwise. Any errors that might // occur are wrapped and returned. // NOTE: all functions that push changes to the Hetzner instance run a waitUntil // call with the appropriate exit condition before returning, such that the // requested operation is confirmed before continuing. This ensures that the // "server" struct always contains up-to-date info of the live instance. // NOTE: this last assumption might still fail in case the same resource // instance is managed by multiple running mgmt instances! // TODO: possible to ensure safe concurrency? func (obj *HetznerVMRes) CheckApply(ctx context.Context, apply bool) (bool, error) { checkOK := true // Request up-to-date server info from the API. if err := obj.getServerUpdate(ctx); err != nil { return false, errwrap.Wrapf(err, "getServerUpdate failed") } // Try to get the server in the correct state (if it is not already there). // NOTE: in case of undefined state, this always returns (true, nil). if c, err := obj.checkApplyServerState(ctx, apply); err != nil { return false, errwrap.Wrapf(err, "checkApplyServerState failed") } else if !c { checkOK = false } // If the intended state was not reached, exit here. // NOTE: this prevents unnecessary checks and operations. // NOTE: undefined state will pass! Further steps are applied if possible. if stateOK, err := obj.serverStateConverged(); err != nil { return false, errwrap.Wrapf(err, "serverStateConverged failed") } else if !stateOK { return false, nil } // Changes in cpu, location and/or image require a server rebuild. // NOTE: these changes are only applied if the server exists. if c, err := obj.checkApplyServerRebuild(ctx, apply); err != nil { return false, errwrap.Wrapf(err, "checkApplyServerRebuild failed") } else if !c { checkOK = false } // Changes in rescue mode can be made without a destructive rebuild. // NOTE: these changes are only applied if the server is running. if c, err := obj.checkApplyRescueMode(ctx, apply); err != nil { return false, errwrap.Wrapf(err, "checkApplyRescueMode failed") } else if !c { checkOK = false } return checkOK, nil } // checkApplyServerState tries to get the server in the correct state. If it is // already there (converged), no changes are applied. In case of the undefined // state, this function immediately returns (true, nil). Otherwise, it powers // the server on/off, creates a new instance, or deletes the existing one as // needed to reach the specified state. // NOTE: the output arguments follow the rules of CheckApply: If the resource // requires changes, CheckApply returns false regardless of the apply value, // true otherwise. Any errors that might occur are wrapped and returned. func (obj *HetznerVMRes) checkApplyServerState(ctx context.Context, apply bool) (bool, error) { if obj.init.Debug { obj.init.Logf("checkApplyServerState(apply: %t)", apply) } // Exit immediately if the server state is undefined. if obj.State == HetznerStateUndefined { return true, nil } // Make sure the server exists as intended before further checks. serverCreationRequired := false if obj.server == nil { // The server doesn't exist as intended (state = "absent"). if obj.State == HetznerStateAbsent { return true, nil } // Otherwise, the server should exist, but doesn't (yet). serverCreationRequired = true if !apply { return false, nil } // Request the creation of a new server. if err := obj.createServer(ctx); err != nil { return false, errwrap.Wrapf(err, "createServer failed") } } // If the resource only needs to exist, exit here. if obj.State == HetznerStateExists { return !serverCreationRequired, nil } // Otherwise, continue if/once the resource is in a steady state. if err := obj.waitUntil(ctx, obj.serverInSteadyState); err != nil { return false, errwrap.Wrapf(err, "waitUntil(serverInSteadyState) exited early") } // If the state has already converged, exit here. stateConverged, err := obj.serverStateConverged() if err != nil { return false, errwrap.Wrapf(err, "serverStateConverged failed") } if checkOK := (stateConverged && !serverCreationRequired); stateConverged { return checkOK, nil } // Otherwise, the server is in a steady state, but not the right one. if !apply { return false, nil } // Apply the necessary changes to get to the specified state. switch obj.State { case HetznerStateRunning: if err := obj.powerServerOn(ctx); err != nil { return false, errwrap.Wrapf(err, "powerServerOn failed") } case HetznerStateOff: if err := obj.powerServerOff(ctx); err != nil { return false, errwrap.Wrapf(err, "powerServerOff failed") } case HetznerStateAbsent: if err := obj.deleteServer(ctx); err != nil { return false, errwrap.Wrapf(err, "deleteServer failed") } default: return false, fmt.Errorf("invalid state: %s", obj.State) } // All required state changes were applied without error. return false, nil } // checkApplyServerRebuild checks the servertype, datacenter and image values of // the live instance, and tries to rebuild the server when that is required to // match the specified params. // NOTE: AllowRebuild protects the user against unexpected server deletions. // NOTE: the output arguments follow the rules of CheckApply: If the resource // requires changes, CheckApply returns false regardless of the apply value, // true otherwise. Any errors that might occur are wrapped and returned. func (obj *HetznerVMRes) checkApplyServerRebuild(ctx context.Context, apply bool) (bool, error) { if obj.init.Debug { obj.init.Logf("checkApplyServerRebuild(apply: %t)", apply) } // Exit immediately if the server does not exist. if obj.server == nil { return false, nil } // Compare ServerType, Datacenter and Image params. specsOK, err := obj.cmpServerSpecs() if err != nil { return false, errwrap.Wrapf(err, "cmpServerSpecs failed") } if specsOK { return true, nil } if !apply { return false, nil } // Rebuild the server to meet specs (if AllowRebuild passes). // NOTE: if "undefined", this tries to match the last observed state. if err := obj.rebuildServer(ctx); err != nil { return false, errwrap.Wrapf(err, "rebuildServer failed") } return false, nil } // checkApplyRescueMode checks if the rescue mode is enabled (or disabled) as // intended, and tries to disable (or enable) the rescue mode if needed to meet // the specified parameters. When enabling rescue mode, the SSH keys specified // by ServerRescueSSHKeys are validated and enabled for rescue login over SSH. // NOTE: rescue mode changes require steady state ("off" or "running"). // NOTE: the output arguments follow the rules of CheckApply: If the resource // requires changes, CheckApply returns false regardless of the apply value, // true otherwise. Any errors that might occur are wrapped and returned. // NOTE: switching image type in ServerRescueMode triggers this checkapply, but // dynamic changes to the SSH keys are not yet supported. // TODO: add "undefined" option for HetznerServerRescueMode? default? // TODO: add support for rescue login via root password? func (obj *HetznerVMRes) checkApplyRescueMode(ctx context.Context, apply bool) (bool, error) { if obj.init.Debug { obj.init.Logf("checkApplyRescueMode(apply: %t)", apply) } // Exit immediately if the server is absent. // NOTE: an absent server is treated as rescue mode "disabled". if obj.server == nil { if obj.ServerRescueMode == HetznerServerRescueDisabled { return true, nil } return false, nil } // Exit if the server is not in a steady state ("running" or "off"). // NOTE: otherwise the "server is locked" when trying to enable or disable. stateOK, err := obj.serverInSteadyState() if err != nil { return false, errwrap.Wrapf(err, "serverInSteadyState failed") } if !stateOK { return false, nil } // Exit if rescue mode is already in the intended configuration. // TODO: add check for ssh keys? Only checking rescueImage. rescueModeOK, err := obj.rescueModeConverged() if err != nil { return false, errwrap.Wrapf(err, "rescueModeConverged failed") } if rescueModeOK { return true, nil } if !apply { return false, nil } // Disable rescue mode to match specs, or to re-enable with new image type. if err := obj.disableRescueMode(ctx); err != nil { return false, errwrap.Wrapf(err, "disableRescueMode failed") } // Enable rescue mode if specified. if obj.ServerRescueMode != HetznerServerRescueDisabled { if err := obj.enableRescueMode(ctx); err != nil { return false, errwrap.Wrapf(err, "enableRescueMode failed") } } return false, nil } // getServerUpdate pings the Hetzner API for up-to-date server info. // NOTE: if obj.server is nil, the server is considered to be in "absent" state. func (obj *HetznerVMRes) getServerUpdate(ctx context.Context) error { if obj.init.Debug { obj.init.Logf("getServerUpdate()") } server, _, err := obj.client.Server.GetByName(ctx, obj.Name()) if err != nil { return errwrap.Wrapf(err, "failed serverupdate request") } obj.server = server return nil } // Cmp compares two resource structs. Returns nil if the comparison holds true, // otherwise an error is thrown to identify the difference. func (obj *HetznerVMRes) Cmp(r engine.Res) error { // check if empty if obj == nil && r == nil { return nil } if (obj == nil) != (r == nil) { return fmt.Errorf("one resource is empty") } // compare types res, ok := r.(*HetznerVMRes) if !ok { return fmt.Errorf("not a %s", obj.Kind()) } // compare resource fields if obj.APIToken != res.APIToken { return fmt.Errorf("apitoken differs") } if obj.State != res.State { return fmt.Errorf("state differs") } if obj.AllowRebuild != res.AllowRebuild { return fmt.Errorf("allowrebuild differs") } if obj.ServerType != res.ServerType { return fmt.Errorf("servertype differs") } if obj.Datacenter != res.Datacenter { return fmt.Errorf("datacenter differs") } if obj.Image != res.Image { return fmt.Errorf("image differs") } if obj.UserData != res.UserData { return fmt.Errorf("userdata differs") } if obj.ServerRescueMode != res.ServerRescueMode { return fmt.Errorf("serverrescuemode differs") } // TODO: more robust comparison of keylists for i, key := range obj.ServerRescueSSHKeys { if key != res.ServerRescueSSHKeys[i] { return fmt.Errorf("serverrescuekeys differ") } } if obj.WaitInterval != res.WaitInterval { return fmt.Errorf("waitinterval differs") } if obj.WaitTimeout != res.WaitTimeout { return fmt.Errorf("waittimeout differs") } return nil } // cmpServerSpecs compares the server specifications between the local mcl // struct HetznerVMRes and the corresponding server instance. Returns true if // ServerType, Datacenter and Image match. Returns an error if the server is // absent. func (obj *HetznerVMRes) cmpServerSpecs() (bool, error) { if obj.init.Debug { obj.init.Logf("cmpServerSpecs()") } if obj.server == nil { return false, fmt.Errorf("server is unavailable") } if obj.ServerType != obj.server.ServerType.Name { return false, nil } if obj.Datacenter != obj.server.Datacenter.Name { return false, nil } if obj.Image != obj.server.Image.Name { return false, nil } return true, nil } // powerServerOn requests a poweron for the specified server, then waits until // the new "running" state is confirmed. Returns an error if the specified // server is absent, or if waitUntil exits early due to timeout, context // cancellation or another error. func (obj *HetznerVMRes) powerServerOn(ctx context.Context) error { if obj.init.Debug { obj.init.Logf("powerServerOn()") } if obj.server == nil { return fmt.Errorf("server is unavailable") } if _, _, err := obj.client.Server.Poweron(ctx, obj.server); err != nil { return errwrap.Wrapf(err, "client.Server.Poweron failed") } // Wait until the poweron is confirmed, error otherwise. if err := obj.waitUntil(ctx, obj.serverStateIs(HetznerStateRunning)); err != nil { return errwrap.Wrapf(err, "waitUntil(serverStateIs(Running)) exited early") } return nil } // powerServerOff requests a poweroff for the specified server, then waits until // the new "off" state is confirmed. Returns an error if the specified server is // absent, or if waitUntil exits early due to timeout, context cancellation or // another error. func (obj *HetznerVMRes) powerServerOff(ctx context.Context) error { if obj.init.Debug { obj.init.Logf("powerServerOff()") } if obj.server == nil { return fmt.Errorf("server is unavailable") } if _, _, err := obj.client.Server.Poweroff(ctx, obj.server); err != nil { return errwrap.Wrapf(err, "client.Server.Poweroff failed") } // Wait until the poweroff is confirmed, error otherwise. if err := obj.waitUntil(ctx, obj.serverStateIs(HetznerStateOff)); err != nil { return errwrap.Wrapf(err, "waitUntil(serverStateIs(Off)) exited early") } return nil } // createServer checks if the servername does not already exists, builds the // serverconfig in hcloud-go format from resource params, requests a server // creation with that configuration, and waits until the creation is confirmed. // Errors occur when the server exists already, the client fails, or the wait // step exits early due context cancellation, client failure or timeout. // NOTE: the startAfterCreate option is used to reach "running" state faster for // two cases. When the state is specified as "running", or when the state is "" // (undefined) and the last observed serverstatus was "running" or "starting". func (obj *HetznerVMRes) createServer(ctx context.Context) error { if obj.init.Debug { obj.init.Logf("createServer()") } if obj.server != nil { return fmt.Errorf("server already exists") } if err := obj.getServerConfig(ctx); err != nil { return errwrap.Wrapf(err, "getServerConfig failed") } if obj.serverconfig.SSHKeys == nil { obj.init.Logf("warning: no ssh keys registered for server creation") } if _, _, err := obj.client.Server.Create(ctx, obj.serverconfig); err != nil { return errwrap.Wrapf(err, "client.server.create failed") } if err := obj.waitUntil(ctx, obj.serverStateIs(HetznerStateExists)); err != nil { return errwrap.Wrapf(err, "waitUntil(serverExists) exited early") } return nil } // deleteServer checks if the server is available from the client, requests a // server deletion from the API, waits for confirmation and then returns. It // returns an error when the server is already absent or something fails. // Context cancellation allows a clean exit when needed. // NOTE: a direct deleteServer call is never blocked. Use with caution. func (obj *HetznerVMRes) deleteServer(ctx context.Context) error { if obj.init.Debug { obj.init.Logf("deleteServer()") } if obj.server == nil { return fmt.Errorf("server is already unavailable") } if _, err := obj.client.Server.Delete(ctx, obj.server); err != nil { return errwrap.Wrapf(err, "client.server.delete failed") } if err := obj.waitUntil(ctx, obj.serverStateIs(HetznerStateAbsent)); err != nil { return errwrap.Wrapf(err, "waitUntil(serverStateIs(Absent)) exited early") } return nil } // rebuildServer deletes the current server instance and creates a new one, in // accordance with the provided resource specifications. If the state is "" // (undefined), this function tries to match the last observed state of the live // instance. If that last observed state is "absent", rebuild returns nil // without creating a new server. Otherwise, the server must exist, and absence // will result in an error. // NOTE: AllowRebuild protects the user against unexpected server deletions: // AllowRebuildError blocks deletion with error, AllowRebuildIgnore blocks // deletion without error, and HetznerAllowRebuildIfNeeded allows deletion. func (obj *HetznerVMRes) rebuildServer(ctx context.Context) error { if obj.init.Debug { obj.init.Logf("rebuildServer()") } // Exit immediately if the server is absent. if obj.server == nil { // Leave undefined server as is, rebuild if/when it becomes available. if obj.State == HetznerStateUndefined { return nil } // Otherwise there is no reason to allow absence. return fmt.Errorf("server is unavailable") } // Exit if rebuild is not allowed. if obj.AllowRebuild == HetznerAllowRebuildError { // exit without applying changes, throw error return fmt.Errorf("server rebuild blocked, requires deletion") } if obj.AllowRebuild == HetznerAllowRebuildIgnore { // exit without applying changes, but no error return nil } // If the server exists but is undefined, save a temporary copy of the last // observed state. This will be used to create the appropriate serverconfig. if obj.State == HetznerStateUndefined { obj.lastObservedState = obj.server.Status } // Rebuild. if err := obj.deleteServer(ctx); err != nil { return errwrap.Wrapf(err, "deleteServer failed") } if err := obj.createServer(ctx); err != nil { return errwrap.Wrapf(err, "createServer failed") } return nil } // getServerConfig builds a serverconfig struct based on the given resource // parameters, such that this serverconfig can be used to create a new server // instance that matches the specified parameters. Errors can occur if the // params used to construct serverconfig contain invalid arguments, or if the // client fails. // NOTE: the startAfterCreate option is used to reach "running" state faster for // two cases. When the state is specified as "running", or when the state is "" // (undefined) and the last observed serverstatus was "running" or "starting". // TODO: add option to define Location xor Datacenter (never both!). func (obj *HetznerVMRes) getServerConfig(ctx context.Context) error { if obj.init.Debug { obj.init.Logf("getServerConfig()") } // default, volumes not supported (yet) // TODO: add support for volume selection? automount := false // poweron at creation to reach "running" faster startAfterCreate := false if obj.State == HetznerStateRunning { startAfterCreate = true } if obj.State == HetznerStateUndefined { switch obj.lastObservedState { case hcloud.ServerStatusRunning, hcloud.ServerStatusStarting: startAfterCreate = true default: // leave powered off } } // collect serverconfig elements serverType, _, err := obj.client.ServerType.GetByName(ctx, obj.ServerType) if err != nil { return errwrap.Wrapf(err, "failed to collect ServerType struct") } image, _, err := obj.client.Image.GetByName(ctx, obj.Image) if err != nil { return errwrap.Wrapf(err, "failed to collect Image struct") } datacenter, _, err := obj.client.Datacenter.GetByName(ctx, obj.Datacenter) if err != nil { return errwrap.Wrapf(err, "failed to collect Datacenter struct") } // TODO: add more flexible key selection keylist, err := obj.client.SSHKey.All(ctx) if err != nil { return errwrap.Wrapf(err, "failed to collect SSHKey array") } // NOTE: GetByName will return nil in case the given name is unknown. if serverType == nil { return fmt.Errorf("unknown servertype: %s", obj.ServerType) } if image == nil { return fmt.Errorf("unknown image: %s", obj.Image) } if datacenter == nil { return fmt.Errorf("unknown datacenter: %s", obj.Datacenter) } // build serverconfig from given specs & defaults obj.serverconfig = hcloud.ServerCreateOpts{ Name: obj.Name(), // string ServerType: serverType, // *ServerType Image: image, // *Image SSHKeys: keylist, // []*SSHKey Location: nil, // *Location Datacenter: datacenter, // *Datacenter UserData: obj.UserData, // string StartAfterCreate: &startAfterCreate, // *bool Labels: nil, // map[string]string Automount: &automount, // *bool Volumes: nil, // []*Volume Networks: nil, // []*Network Firewalls: nil, // []*ServerCreateFirewall PlacementGroup: nil, // *PlacementGroup } // hcloud-go provides basic validation, but this can still miss problems! // TODO: add tests? If issues come up, add checks to Validate. if err := hcloud.ServerCreateOpts.Validate(obj.serverconfig); err != nil { return errwrap.Wrapf(err, "invalid serverconfig") } return nil } // enableRescueMode tries to enable rescue mode for the specified server, then // waits until the operation is confirmed. Returns an error if the server is not // in steady state, if an intermediate API request fails, if waitUntil exits // early or in case of context cancellation. // NOTE: the EnableRescue request requires steady state ("off" or "running"). func (obj *HetznerVMRes) enableRescueMode(ctx context.Context) error { if obj.init.Debug { obj.init.Logf("enableRescueMode()") } // Exit immediately if the server is absent. if obj.server == nil { return fmt.Errorf("server is unavailable") } // Exit if rescue mode is already enabled. if obj.server.RescueEnabled { return nil } // Exit if the server is not in a steady state ("running" or "off"). // NOTE: otherwise the "server is locked" when trying to enable. stateOK, err := obj.serverInSteadyState() if err != nil { return errwrap.Wrapf(err, "serverInSteadyState failed") } if !stateOK { return fmt.Errorf("state must be 'running' or 'off' (now: %s)", obj.server.Status) } // Format rescueImage and rescueKeys, then enable rescue mode. // NOTE: rescueImage and rescueKeys also provide a checkapply reference. obj.rescueImage = hcloud.ServerRescueType(obj.ServerRescueMode) if err := obj.getRescueKeys(ctx); err != nil { return errwrap.Wrapf(err, "getRescueKeys failed") } opts := hcloud.ServerEnableRescueOpts{ Type: obj.rescueImage, SSHKeys: obj.rescueKeys, } if _, _, err := obj.client.Server.EnableRescue(ctx, obj.server, opts); err != nil { return errwrap.Wrapf(err, "client.Server.EnableRescue failed") } // NOTE: EnableRescue returns a root password, but this is ignored in favor // of connecting to the server in rescue mode over SSH. // TODO: add support for password login? SSH usually ok. // Wait until the rescue enable is confirmed. if err := obj.waitUntil(ctx, obj.rescueModeEnabled); err != nil { return errwrap.Wrapf(err, "waitUntil(rescueModeEnabled) exited early") } return nil } // disableRescueMode tries to disable rescue mode for the specified server, then // waits until the operation is confirmed. It returns early if the rescue mode // is already disabled. Returns an error if an intermediate API request fails, // if waitUntil exits early, or in case of context cancellation. // NOTE: an absent server is treated as a disabled serverrescuemode. // NOTE: the DisableRescue request requires steady state ("off" or "running"). func (obj *HetznerVMRes) disableRescueMode(ctx context.Context) error { if obj.init.Debug { obj.init.Logf("disableRescueMode()") } // Exit immediately if rescue mode is already disabled. if obj.server == nil { return nil } if !obj.server.RescueEnabled { return nil } // Exit if the server is not in a steady state ("running" or "off"). // NOTE: otherwise the "server is locked" when trying to enable. stateOK, err := obj.serverInSteadyState() if err != nil { return errwrap.Wrapf(err, "serverInSteadyState failed") } if !stateOK { return fmt.Errorf("state must be 'running' or 'off' (now: %s)", obj.server.Status) } // Disable rescue mode. if _, _, err := obj.client.Server.DisableRescue(ctx, obj.server); err != nil { return errwrap.Wrapf(err, "client.Server.EnableRescue failed") } // Wait until the rescue disable is confirmed. if err := obj.waitUntil(ctx, obj.rescueModeDisabled); err != nil { return errwrap.Wrapf(err, "waitUntil(rescueModeDisabled) exited early") } return nil } // getRescueKeys builds a list of keys to be enabled for rescue mode over SSH. // ServerRescueSSHKeys provides the selected keys as []string by name. The // corresponding data is collected with the Hetzner client (if valid). The // resulting keylist is formatted as []*hcloud.SSHKey for use with hcloud, and // saved for later use in private field rescueKeys. // TODO: standardize this so that it can also be used for serverconfig keys. func (obj *HetznerVMRes) getRescueKeys(ctx context.Context) error { if obj.init.Debug { obj.init.Logf("getRescueKeys()") } var keylist []*hcloud.SSHKey for _, keyname := range obj.ServerRescueSSHKeys { key, _, err := obj.client.SSHKey.GetByName(ctx, keyname) if err != nil { return errwrap.Wrapf(err, "SSHKey GetByName(%s) failed", keyname) } if key == nil { return fmt.Errorf("unknown keyname: %s", keyname) } if obj.init.Debug { obj.init.Logf("appending known key: %s", keyname) } keylist = append(keylist, key) } obj.rescueKeys = keylist return nil } // waitUntil provides a general function that waits until the provided exit // condition is satisfied. It retries every WaitInterval until the condition is // satisfied. It can exit early in case the WaitTimeout is reached, the context // is cancelled or an error occurs. Otherwise it returns nil once the condition // is satisfied. The exit condition must check a well-defined condition for the // resource, and return true if satisfied, false otherwise. The condition must // check its logic without API requests, so no context is needed. func (obj *HetznerVMRes) waitUntil(ctx context.Context, condition func() (bool, error)) error { if obj.init.Debug { obj.init.Logf("waitUntil()") } timeout := time.After(time.Duration(obj.WaitTimeout) * time.Second) for { // Get up-to-date serverinfo. if err := obj.getServerUpdate(ctx); err != nil { return errwrap.Wrapf(err, "failed serverupdate request") } // Check if the provided exit condition is satisfied. conditionSatisfied, err := condition() if err != nil { return errwrap.Wrapf(err, "failed to confirm exit condition") } if conditionSatisfied { return nil } // Retry every WaitInterval until the exit condition is satisfied. // Can exit early by timeout, context cancellation or an error. select { case <-time.After(time.Duration(obj.WaitInterval) * time.Second): // retry confirmation case <-timeout: return fmt.Errorf("timeout: exit condition not confirmed after %d seconds", obj.WaitTimeout) case <-ctx.Done(): return errwrap.Wrapf(ctx.Err(), "wait interrupted by context") } } } // serverStateConverged checks if the target server is in the desired state. // Returns true if the client confirms that the state is "exists", "running", // "off" or "absent" as intended. An undefined state "" always returns true. // Otherwise, this function returns false. Invalid states result in an error. func (obj *HetznerVMRes) serverStateConverged() (converged bool, err error) { if obj.init.Debug { obj.init.Logf("serverStateConverged()") } // always return true if undefined if obj.State == HetznerStateUndefined { return true, nil } // return true if absent as intended if obj.server == nil { if obj.State == HetznerStateAbsent { return true, nil } return false, nil } // convergence cases if the server exists switch obj.State { case HetznerStateAbsent: // false, nil case HetznerStateExists: converged = true case HetznerStateRunning: converged = (obj.server.Status == hcloud.ServerStatusRunning) case HetznerStateOff: converged = (obj.server.Status == hcloud.ServerStatusOff) default: err = fmt.Errorf("invalid state: %s", obj.State) } return converged, err } // serverInSteadyState returns true if the server is in one of the two known // steady states, i.e. "running" or "off", and false otherwise. Any other states // are either transients or "absent", so it is safe to return false without // errors and try again later if needed. func (obj *HetznerVMRes) serverInSteadyState() (steady bool, err error) { if obj.init.Debug { obj.init.Logf("serverInSteadyState()") } if obj.server == nil { return false, nil } switch obj.server.Status { case hcloud.ServerStatusRunning, hcloud.ServerStatusOff: return true, nil default: return false, nil } } // rescueModeEnabled returns true if rescue mode is enabled, false otherwise. func (obj *HetznerVMRes) rescueModeEnabled() (bool, error) { if obj.init.Debug { obj.init.Logf("rescueModeEnabled()") } if obj.server == nil { return false, nil } if obj.server.RescueEnabled { return true, nil } return false, nil } // rescueModeDisabled returns true if rescue mode is disabled, false otherwise. // Server absence is also considered to "disable" rescue mode, and returns true. func (obj *HetznerVMRes) rescueModeDisabled() (bool, error) { if obj.init.Debug { obj.init.Logf("rescueModeDisabled()") } if obj.server == nil { return true, nil } if obj.server.RescueEnabled { return false, nil } return true, nil } // rescueModeConverged returns true if the server's rescue mode is enabled or // disabled as intended, false otherwise. Absence is treated as a valid case of // disabled rescue mode. An error can only occur for invalid rescue images. // TODO: review checks for image and ssh keys. func (obj *HetznerVMRes) rescueModeConverged() (bool, error) { if obj.init.Debug { obj.init.Logf("rescueModeConverged()") } // check server existence if obj.server == nil { if obj.ServerRescueMode == HetznerServerRescueDisabled { return true, nil } return false, nil } // check rescue mode switch obj.ServerRescueMode { case HetznerServerRescueDisabled: // check if disabled as intended if obj.server.RescueEnabled { return false, nil } case HetznerServerRescueTypeLinux32, HetznerServerRescueTypeLinux64, HetznerServerRescueTypeFreeBSD64: // check if enabled as intended if !obj.server.RescueEnabled { return false, nil } // check if the last used image type matches specs // TODO: reference logic needs review if obj.rescueImage != hcloud.ServerRescueType(obj.ServerRescueMode) { return false, nil } // check if the last used keyset matches specs // TODO: compare rescueKeys with ServerRescueSSHKeys? default: return false, fmt.Errorf("invalid ServerRescueMode: %s", obj.ServerRescueMode) } return true, nil } // serverStateIs returns a function that can be used with waitUntil. When this // function is called, it returns true if the server status matches the state // specified as input argument, false otherwise. It also returns false if the // state argument is not supported. The supported states are "absent", "exists", // "running", "off" and "" (undefined). Other inputs will result in an error. // NOTE: hcloud states like ServerStatusUnknown and ServerStatusDeleting are // also considered to be valid for state "exists". This is important to take // into account when rewriting or adjusting any logic using this function. func (obj *HetznerVMRes) serverStateIs(state string) func() (bool, error) { if obj.init.Debug { obj.init.Logf("serverStateIs(%s)", state) } return func() (bool, error) { // Undefined state is always true. if state == HetznerStateUndefined { return true, nil } // Exit if the server is absent. if obj.server == nil { if state == HetznerStateAbsent { return true, nil } return false, nil } // The server exists, but in the right state? switch state { case HetznerStateAbsent: return false, nil case HetznerStateExists: return true, nil case HetznerStateRunning: if obj.server.Status == hcloud.ServerStatusRunning { return true, nil } case HetznerStateOff: if obj.server.Status == hcloud.ServerStatusOff { return true, nil } default: return false, fmt.Errorf("unsupported state: %s", state) } return false, nil } }