diff --git a/docs/resource-guide.md b/docs/resource-guide.md index d961da62..fdc21060 100644 --- a/docs/resource-guide.md +++ b/docs/resource-guide.md @@ -108,10 +108,15 @@ opened in the `Init` method and were using throughout the resource. ```golang // Close runs some cleanup code for this resource. func (obj *FooRes) Close() error { + err := obj.conn.Close() // close some internal connection - obj.Conn.Close() // ignore error in this case - - return obj.BaseRes.Close() // call base close, b/c we're overriding + // call base close, b/c we're overriding + if e := obj.BaseRes.Close(); err == nil { + err = e + } else if e != nil { + err = multierr.Append(err, e) // list of errors + } + return err } ``` diff --git a/examples/virt4.yaml b/examples/virt4.yaml index efa5fb03..ca3b0752 100644 --- a/examples/virt4.yaml +++ b/examples/virt4.yaml @@ -3,7 +3,13 @@ graph: mygraph resources: virt: - name: mgmt4 + meta: + limit: .inf + burst: 0 uri: 'qemu:///session' + cpus: 1 + maxcpus: 4 + memory: 524288 boot: - hd disk: diff --git a/misc/delta-cpu.sh b/misc/delta-cpu.sh new file mode 100755 index 00000000..30fd2435 --- /dev/null +++ b/misc/delta-cpu.sh @@ -0,0 +1,81 @@ +#!/bin/bash +# shitty cpu count control, useful for live demos + +minimum=1 # don't decrease below this number of cpus +maximum=8 # don't increase above this number of cpus +count=1 # initial count +factor=3 +function output() { +count=$1 # arg! +cat << EOF > ~/code/mgmt/examples/virt4.yaml +--- +graph: mygraph +resources: + virt: + - name: mgmt4 + meta: + limit: .inf + burst: 0 + uri: 'qemu:///session' + cpus: $count + maxcpus: $maximum + memory: 524288 + boot: + - hd + disk: + - type: qcow2 + source: "~/.local/share/libvirt/images/fedora-23-scratch.qcow2" + state: running + transient: false +edges: [] +comment: "qemu-img create -b fedora-23.qcow2 -f qcow2 fedora-23-scratch.qcow2" +EOF +} +#tput cuu 1 && tput el # remove last line +str='' +tnuoc=$((maximum-count)) # backwards count +count2=$((count * factor)) +tnuoc2=$((tnuoc * factor)) +left=`yes '>' | head -$count2 | paste -s -d '' -` +right=`yes ' ' | head -$tnuoc2 | paste -s -d '' -` +str="${left}${right}" +_min=$((minimum-1)) +_max=$((maximum+1)) +reset # clean up once... +output $count # call function +while true; do + + read -n1 -r -s -p "CPUs count is: $count; ${str} Press +/- key to adjust." key + if [ "$key" = "q" ] || [ "$key" = "Q" ]; then + echo # newline + exit + fi + if [ ! "$key" = "+" ] && [ ! "$key" = "-" ] && [ ! "$key" = "=" ] && [ ! "$key" = "_" ]; then # wrong key + reset # woops, reset it all... + continue + fi + if [ "$key" == "+" ] || [ "$key" == "=" ]; then + count=$((count+1)) + fi + if [ "$key" == "-" ] || [ "$key" == "_" ]; then + count=$((count-1)) + fi + if [ $count -eq $_min ]; then # min + count=$minimum + fi + if [ $count -eq $_max ]; then # max + count=$maximum + fi + + tnuoc=$((maximum-count)) # backwards count + #echo "count is: $count" + #echo "tnuoc is: $tnuoc" + count2=$((count * factor)) + tnuoc2=$((tnuoc * factor)) + left=`yes '>' | head -$count2 | paste -s -d '' -` + right=`yes ' ' | head -$tnuoc2 | paste -s -d '' -` + str="${left}${right}" + #echo "str is: $str" + echo -ne '\r' # backup + output $count # call function +done diff --git a/resources/virt.go b/resources/virt.go index b6418a7e..cd79c35e 100644 --- a/resources/virt.go +++ b/resources/virt.go @@ -31,7 +31,9 @@ import ( "github.com/purpleidea/mgmt/event" + multierr "github.com/hashicorp/go-multierror" "github.com/libvirt/libvirt-go" + libvirtxml "github.com/libvirt/libvirt-go-xml" errwrap "github.com/pkg/errors" ) @@ -39,6 +41,17 @@ func init() { gob.Register(&VirtRes{}) } +const ( + // DefaultMaxCPUs is the default number of possible cpu "slots" used. + DefaultMaxCPUs = 32 + + // MaxShutdownDelayTimeout is the max time we wait for a vm to shutdown. + MaxShutdownDelayTimeout = 60 * 5 // seconds + + // ShortPollInterval is how often we poll when expecting an event. + ShortPollInterval = 5 // seconds +) + var ( libvirtInitialized = false ) @@ -65,6 +78,7 @@ type VirtRes struct { State string `yaml:"state"` // running, paused, shutoff Transient bool `yaml:"transient"` // defined (false) or undefined (true) CPUs uint `yaml:"cpus"` + MaxCPUs uint `yaml:"maxcpus"` Memory uint64 `yaml:"memory"` // in KBytes OSInit string `yaml:"osinit"` // init used by lxc Boot []string `yaml:"boot"` // boot order. values: fd, hd, cdrom, network @@ -74,9 +88,19 @@ type VirtRes struct { Filesystem []filesystemDevice `yaml:"filesystem"` Auth *VirtAuth `yaml:"auth"` - conn *libvirt.Connect - absent bool // cached state - uriScheme virtURISchemeType + HotCPUs bool `yaml:"hotcpus"` // allow hotplug of cpus? + // FIXME: values here should be enum's! + RestartOnDiverge string `yaml:"restartondiverge"` // restart policy: "ignore", "ifneeded", "error" + RestartOnRefresh bool `yaml:"restartonrefresh"` // restart on refresh? + + conn *libvirt.Connect + version uint32 // major * 1000000 + minor * 1000 + release + absent bool // cached state + uriScheme virtURISchemeType + processExitWatch bool // do we want to wait on an explicit process exit? + processExitChan chan struct{} + restartScheduled bool // do we need to schedule a hard restart? + guestAgentConnected bool // our tracking of if guest agent is running } // Default returns some sensible defaults for this resource. @@ -85,9 +109,22 @@ func (obj *VirtRes) Default() Res { BaseRes: BaseRes{ MetaParams: DefaultMetaParams, // force a default }, + + MaxCPUs: DefaultMaxCPUs, + HotCPUs: true, // we're a dynamic engine, be dynamic by default! + + RestartOnDiverge: "error", // safest default :( } } +// Validate if the params passed in are valid data. +func (obj *VirtRes) Validate() error { + if obj.CPUs > obj.MaxCPUs { + return fmt.Errorf("the number of CPUs (%d) must not be greater than MaxCPUs (%d)", obj.CPUs, obj.MaxCPUs) + } + return obj.BaseRes.Validate() +} + // Init runs some startup code for this resource. func (obj *VirtRes) Init() error { if !libvirtInitialized { @@ -108,15 +145,71 @@ func (obj *VirtRes) Init() error { obj.absent = (obj.Transient && obj.State == "shutoff") // machine shouldn't exist + obj.conn, err = obj.connect() // gets closed in Close method of Res API + if err != nil { + return errwrap.Wrapf(err, "%s[%s]: Connection to libvirt failed in init", obj.Kind(), obj.GetName()) + } + + // check for hard to change properties + dom, err := obj.conn.LookupDomainByName(obj.GetName()) + if err == nil { + defer dom.Free() + } else if !isNotFound(err) { + return errwrap.Wrapf(err, "%s[%s]: Could not lookup on init", obj.Kind(), obj.GetName()) + } + + if err == nil { + // maxCPUs, err := dom.GetMaxVcpus() + i, err := dom.GetVcpusFlags(libvirt.DOMAIN_VCPU_MAXIMUM) + if err != nil { + return errwrap.Wrapf(err, "%s[%s]: Could not lookup MaxCPUs on init", obj.Kind(), obj.GetName()) + } + maxCPUs := uint(i) + if obj.MaxCPUs != maxCPUs { // max cpu slots is hard to change + // we'll need to reboot to fix this one... + obj.restartScheduled = true + } + + // parse running domain xml to read properties + // FIXME: should we do this in Watch, after we register the + // event handlers so that we don't miss any events via race? + xmlDesc, err := dom.GetXMLDesc(0) // 0 means no flags + if err != nil { + return errwrap.Wrapf(err, "%s[%s]: Could not GetXMLDesc on init", obj.Kind(), obj.GetName()) + } + domXML := &libvirtxml.Domain{} + if err := domXML.Unmarshal(xmlDesc); err != nil { + return errwrap.Wrapf(err, "%s[%s]: Could not unmarshal XML on init", obj.Kind(), obj.GetName()) + } + + // guest agent: domain->devices->channel->target->state == connected? + for _, x := range domXML.Devices.Channels { + if x.Target.Type == "virtio" && strings.HasPrefix(x.Target.Name, "org.qemu.guest_agent.") { + // last connection found wins (usually 1 anyways) + obj.guestAgentConnected = (x.Target.State == "connected") + } + } + } + obj.BaseRes.kind = "Virt" return obj.BaseRes.Init() // call base init, b/c we're overriding } -// Validate if the params passed in are valid data. -func (obj *VirtRes) Validate() error { - return obj.BaseRes.Validate() +// Close runs some cleanup code for this resource. +func (obj *VirtRes) Close() error { + // TODO: what is the first int Close return value useful for (if at all)? + _, err := obj.conn.Close() // close libvirt conn that was opened in Init + + // call base close, b/c we're overriding + if e := obj.BaseRes.Close(); err == nil { + err = e + } else if e != nil { + err = multierr.Append(err, e) // list of errors + } + return err } +// connect is the connect helper for the libvirt connection. It can handle auth. func (obj *VirtRes) connect() (conn *libvirt.Connect, err error) { if obj.Auth != nil { callback := func(creds []*libvirt.ConnectCredential) { @@ -139,21 +232,27 @@ func (obj *VirtRes) connect() (conn *libvirt.Connect, err error) { Callback: callback, } conn, err = libvirt.NewConnectWithAuth(obj.URI, auth, 0) + if err == nil { + if version, err := conn.GetLibVersion(); err == nil { + obj.version = version + } + } } if obj.Auth == nil || err != nil { conn, err = libvirt.NewConnect(obj.URI) + if err == nil { + if version, err := conn.GetLibVersion(); err == nil { + obj.version = version + } + } } return } // Watch is the primary listener for this resource and it outputs events. func (obj *VirtRes) Watch(processChan chan *event.Event) error { - conn, err := obj.connect() - if err != nil { - return fmt.Errorf("Connection to libvirt failed with: %s", err) - } - - eventChan := make(chan libvirt.DomainEventType) // TODO: do we need to buffer this? + domChan := make(chan libvirt.DomainEventType) // TODO: do we need to buffer this? + gaChan := make(chan *libvirt.DomainEventAgentLifecycle) errorChan := make(chan error) exitChan := make(chan struct{}) defer close(exitChan) @@ -181,17 +280,32 @@ func (obj *VirtRes) Watch(processChan chan *event.Event) error { } }() - callback := func(c *libvirt.Connect, d *libvirt.Domain, ev *libvirt.DomainEventLifecycle) { + // domain events callback + domCallback := func(c *libvirt.Connect, d *libvirt.Domain, ev *libvirt.DomainEventLifecycle) { domName, _ := d.GetName() if domName == obj.GetName() { - eventChan <- ev.Event + domChan <- ev.Event } } - callbackID, err := conn.DomainEventLifecycleRegister(nil, callback) + // if dom is nil, we get events for *all* domains! + domCallbackID, err := obj.conn.DomainEventLifecycleRegister(nil, domCallback) if err != nil { return err } - defer conn.DomainEventDeregister(callbackID) + defer obj.conn.DomainEventDeregister(domCallbackID) + + // guest agent events callback + gaCallback := func(c *libvirt.Connect, d *libvirt.Domain, eva *libvirt.DomainEventAgentLifecycle) { + domName, _ := d.GetName() + if domName == obj.GetName() { + gaChan <- eva + } + } + gaCallbackID, err := obj.conn.DomainEventAgentLifecycleRegister(nil, gaCallback) + if err != nil { + return err + } + defer obj.conn.DomainEventDeregister(gaCallbackID) // notify engine that we're running if err := obj.Running(processChan); err != nil { @@ -202,8 +316,9 @@ func (obj *VirtRes) Watch(processChan chan *event.Event) error { var exit *error // if ptr exists, that is the exit error to return for { + processExited := false // did the process exit fully (shutdown)? select { - case event := <-eventChan: + case event := <-domChan: // TODO: shouldn't we do these checks in CheckApply ? switch event { case libvirt.DOMAIN_EVENT_DEFINED: @@ -235,11 +350,43 @@ func (obj *VirtRes) Watch(processChan chan *event.Event) error { obj.StateOK(false) // dirty send = true } + processExited = true + case libvirt.DOMAIN_EVENT_PMSUSPENDED: + // FIXME: IIRC, in s3 we can't cold change + // hardware like cpus but in s4 it's okay? + // verify, detect and patch appropriately! fallthrough case libvirt.DOMAIN_EVENT_CRASHED: obj.StateOK(false) // dirty send = true + processExited = true // FIXME: is this okay for PMSUSPENDED ? + } + + if obj.processExitWatch && processExited { + close(obj.processExitChan) // send signal + obj.processExitWatch = false + } + + case agentEvent := <-gaChan: + state, reason := agentEvent.State, agentEvent.Reason + + if state == libvirt.CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_STATE_CONNECTED { + obj.guestAgentConnected = true + obj.StateOK(false) // dirty + send = true + log.Printf("%s[%s]: Guest agent connected", obj.Kind(), obj.GetName()) + + } else if state == libvirt.CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_STATE_DISCONNECTED { + obj.guestAgentConnected = false + // ignore CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_REASON_DOMAIN_STARTED + // events because they just tell you that guest agent channel was added + if reason == libvirt.CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_REASON_CHANNEL { + log.Printf("%s[%s]: Guest agent disconnected", obj.Kind(), obj.GetName()) + } + + } else { + return fmt.Errorf("Unknown %s[%s] guest agent state: %v", obj.Kind(), obj.GetName(), state) } case err := <-errorChan: @@ -258,51 +405,9 @@ func (obj *VirtRes) Watch(processChan chan *event.Event) error { } } -// attrCheckApply performs the CheckApply functions for CPU, Memory and others. -// This shouldn't be called when the machine is absent; it won't be found! -func (obj *VirtRes) attrCheckApply(apply bool) (bool, error) { - var checkOK = true - - dom, err := obj.conn.LookupDomainByName(obj.GetName()) - if err != nil { - return false, errwrap.Wrapf(err, "conn.LookupDomainByName failed") - } - - domInfo, err := dom.GetInfo() - if err != nil { - // we don't know if the state is ok - return false, errwrap.Wrapf(err, "domain.GetInfo failed") - } - - // check memory - if domInfo.Memory != obj.Memory { - checkOK = false - if !apply { - return false, nil - } - if err := dom.SetMemory(obj.Memory); err != nil { - return false, errwrap.Wrapf(err, "domain.SetMemory failed") - } - log.Printf("%s[%s]: Memory changed", obj.Kind(), obj.GetName()) - } - - // check cpus - if domInfo.NrVirtCpu != obj.CPUs { - checkOK = false - if !apply { - return false, nil - } - if err := dom.SetVcpus(obj.CPUs); err != nil { - return false, errwrap.Wrapf(err, "domain.SetVcpus failed") - } - log.Printf("%s[%s]: CPUs changed", obj.Kind(), obj.GetName()) - } - - return checkOK, nil -} - -// domainCreate creates a transient or persistent domain in the correct state. It -// doesn't check the state before hand, as it is a simple helper function. +// domainCreate creates a transient or persistent domain in the correct state. +// It doesn't check the state before hand, as it is a simple helper function. +// The caller must run dom.Free() after use, when error was returned as nil. func (obj *VirtRes) domainCreate() (*libvirt.Domain, bool, error) { if obj.Transient { @@ -350,83 +455,20 @@ func (obj *VirtRes) domainCreate() (*libvirt.Domain, bool, error) { return dom, false, nil } -// CheckApply checks the resource state and applies the resource if the bool -// input is true. It returns error info and if the state check passed or not. -func (obj *VirtRes) CheckApply(apply bool) (bool, error) { - var err error - obj.conn, err = obj.connect() - if err != nil { - return false, fmt.Errorf("Connection to libvirt failed with: %s", err) - } - +// stateCheckApply starts, stops, or pauses/unpauses the domain as needed. +func (obj *VirtRes) stateCheckApply(apply bool, dom *libvirt.Domain) (bool, error) { var checkOK = true - - dom, err := obj.conn.LookupDomainByName(obj.GetName()) - if err == nil { - // pass - } else if virErr, ok := err.(libvirt.Error); ok && virErr.Code == libvirt.ERR_NO_DOMAIN { - // domain not found - if obj.absent { - return true, nil - } - - if !apply { - return false, nil - } - - var c = true - dom, c, err = obj.domainCreate() // create the domain - if err != nil { - return false, errwrap.Wrapf(err, "domainCreate failed") - } else if !c { - checkOK = false - } - - } else { - return false, errwrap.Wrapf(err, "LookupDomainByName failed") - } - defer dom.Free() - // domain exists - domInfo, err := dom.GetInfo() if err != nil { // we don't know if the state is ok return false, errwrap.Wrapf(err, "domain.GetInfo failed") } - isPersistent, err := dom.IsPersistent() - if err != nil { - // we don't know if the state is ok - return false, errwrap.Wrapf(err, "domain.IsPersistent failed") - } isActive, err := dom.IsActive() if err != nil { // we don't know if the state is ok return false, errwrap.Wrapf(err, "domain.IsActive failed") } - // check for persistence - if isPersistent == obj.Transient { // if they're different! - if !apply { - return false, nil - } - if isPersistent { - if err := dom.Undefine(); err != nil { - return false, errwrap.Wrapf(err, "domain.Undefine failed") - } - log.Printf("%s[%s]: Domain undefined", obj.Kind(), obj.GetName()) - } else { - domXML, err := dom.GetXMLDesc(libvirt.DOMAIN_XML_INACTIVE) - if err != nil { - return false, errwrap.Wrapf(err, "domain.GetXMLDesc failed") - } - if _, err = obj.conn.DomainDefineXML(domXML); err != nil { - return false, errwrap.Wrapf(err, "conn.DomainDefineXML failed") - } - log.Printf("%s[%s]: Domain defined", obj.Kind(), obj.GetName()) - } - checkOK = false - } - // check for valid state switch obj.State { case "running": @@ -490,6 +532,283 @@ func (obj *VirtRes) CheckApply(apply bool) (bool, error) { log.Printf("%s[%s]: Domain destroyed", obj.Kind(), obj.GetName()) } + return checkOK, nil +} + +// attrCheckApply performs the CheckApply functions for CPU, Memory and others. +// This shouldn't be called when the machine is absent; it won't be found! +func (obj *VirtRes) attrCheckApply(apply bool, dom *libvirt.Domain) (bool, error) { + var checkOK = true + domInfo, err := dom.GetInfo() + if err != nil { + // we don't know if the state is ok + return false, errwrap.Wrapf(err, "domain.GetInfo failed") + } + + // check (balloon) memory + // FIXME: check that we don't increase past max memory... + if domInfo.Memory != obj.Memory { + if !apply { + return false, nil + } + checkOK = false + if err := dom.SetMemory(obj.Memory); err != nil { + return false, errwrap.Wrapf(err, "domain.SetMemory failed") + } + log.Printf("%s[%s]: Memory changed to %d", obj.Kind(), obj.GetName(), obj.Memory) + } + + // check cpus + if domInfo.NrVirtCpu != obj.CPUs { + if !apply { + return false, nil + } + + // unused: DOMAIN_VCPU_CURRENT + switch domInfo.State { + case libvirt.DOMAIN_PAUSED: + // we can queue up the SetVcpus operation, + // which will be seen once vm is unpaused! + fallthrough + case libvirt.DOMAIN_RUNNING: + // cpu hot*un*plug introduced in 2.2.0 + // 2 * 1000000 + 2 * 1000 + 0 = 2002000 + if obj.HotCPUs && obj.version < 2002000 && domInfo.NrVirtCpu > obj.CPUs { + return false, fmt.Errorf("libvirt 2.2.0 or greater is required to hotunplug cpus") + } + // pkrempa says HOTPLUGGABLE is implied when doing LIVE + // on running machine, but we add it anyways in case we + // race and the machine is in shutoff state. We need to + // specify HOTPLUGGABLE if we add while machine is off! + // We particularly need to add HOTPLUGGABLE with CONFIG + flags := libvirt.DOMAIN_VCPU_LIVE + if !obj.Transient { + flags |= libvirt.DOMAIN_VCPU_CONFIG + // hotpluggable flag introduced in 2.4.0 + // 2 * 1000000 + 4 * 1000 + 0 = 2004000 + if obj.version >= 2004000 { + flags |= libvirt.DOMAIN_VCPU_HOTPLUGGABLE + } + } + if err := dom.SetVcpusFlags(obj.CPUs, flags); err != nil { + return false, errwrap.Wrapf(err, "domain.SetVcpus failed") + } + checkOK = false + log.Printf("%s[%s]: CPUs (hot) changed to %d", obj.Kind(), obj.GetName(), obj.CPUs) + + case libvirt.DOMAIN_SHUTOFF, libvirt.DOMAIN_SHUTDOWN: + if !obj.Transient { + flags := libvirt.DOMAIN_VCPU_CONFIG + if obj.version >= 2004000 { + flags |= libvirt.DOMAIN_VCPU_HOTPLUGGABLE + } + if err := dom.SetVcpusFlags(obj.CPUs, flags); err != nil { + return false, errwrap.Wrapf(err, "domain.SetVcpus failed") + } + checkOK = false + log.Printf("%s[%s]: CPUs (cold) changed to %d", obj.Kind(), obj.GetName(), obj.CPUs) + } + + default: + // FIXME: is this accurate? + return false, fmt.Errorf("can't modify cpus when in %v", domInfo.State) + } + } + + // modify the online aspect of the cpus with qemu-guest-agent + if obj.HotCPUs && obj.guestAgentConnected && domInfo.State != libvirt.DOMAIN_PAUSED { + + // if hotplugging a cpu without the guest agent, you might need: + // manually to: echo 1 > /sys/devices/system/cpu/cpu1/online OR + // udev (untested) in: /etc/udev/rules.d/99-hotplugCPU.rules + // SUBSYSTEM=="cpu",ACTION=="add",RUN+="/bin/sh -c '[ ! -e /sys$devpath/online ] || echo 1 > /sys$devpath/online'" + + // how many online cpus are there? + i, err := dom.GetVcpusFlags(libvirt.DOMAIN_VCPU_GUEST) + if err != nil { + return false, errwrap.Wrapf(err, "domain.GetVcpus failed from qemu-guest-agent") + } + onlineCPUs := uint(i) + if onlineCPUs != obj.CPUs { + if !apply { + return false, nil + } + if err := dom.SetVcpusFlags(obj.CPUs, libvirt.DOMAIN_VCPU_GUEST); err != nil { + return false, errwrap.Wrapf(err, "domain.SetVcpus failed") + } + checkOK = false + log.Printf("%s[%s]: CPUs (guest) changed to %d", obj.Kind(), obj.GetName(), obj.CPUs) + } + } + + return checkOK, nil +} + +// domainShutdownSync powers off a domain in a manner which will allow hardware +// to be changed while off. This requires the process to exit so that when it's +// called again, qemu can start up fresh as if we cold swapped in new hardware! +// This method is particularly special because it waits for shutdown to finish. +func (obj *VirtRes) domainShutdownSync(apply bool, dom *libvirt.Domain) (bool, error) { + // we need to wait for shutdown to be finished before we can restart it + once := true + timeout := time.After(time.Duration(MaxShutdownDelayTimeout) * time.Second) + + // wait until shutdown completion... + for { + domInfo, err := dom.GetInfo() + if err != nil { + // we don't know if the state is ok + return false, errwrap.Wrapf(err, "domain.GetInfo failed") + } + if domInfo.State == libvirt.DOMAIN_SHUTOFF || domInfo.State == libvirt.DOMAIN_SHUTDOWN { + log.Printf("%s[%s]: Shutdown", obj.Kind(), obj.GetName()) + break + } + + if once { + if !apply { + return false, nil + } + obj.processExitWatch = true + obj.processExitChan = make(chan struct{}) + // if machine shuts down before we call this, we error; + // this isn't ideal, but it happened due to user error! + log.Printf("%s[%s]: Running shutdown", obj.Kind(), obj.GetName()) + if err := dom.Shutdown(); err != nil { + // FIXME: if machine is already shutdown completely, return early + return false, errwrap.Wrapf(err, "domain.Shutdown failed") + } + once = false // we did some work! + } + + select { + case <-obj.processExitChan: // should get a close signal... + // pass + case <-time.After(time.Duration(ShortPollInterval) * time.Second): + // poll until timeout in case no event ever arrives + // this happens when using Meta().Poll for example! + + // FIXME: some domains can reboot when asked to shutdown + // via the `on_poweroff` xml setting. in this case, we + // might only exit from here via timeout... avoid this! + // https://libvirt.org/formatdomain.html#elementsEvents + continue + case <-timeout: + return false, fmt.Errorf("%s[%s]: didn't shutdown after %d seconds", obj.Kind(), obj.GetName(), MaxShutdownDelayTimeout) + } + } + + return once, nil +} + +// CheckApply checks the resource state and applies the resource if the bool +// input is true. It returns error info and if the state check passed or not. +func (obj *VirtRes) CheckApply(apply bool) (bool, error) { + // if we do the restart, we must flip the flag back to false as evidence + var restart bool // do we need to do a restart? + if obj.RestartOnRefresh && obj.Refresh() { // a refresh is a restart ask + restart = true + } + + // we need to restart in all situations except ignore. the "error" case + // means that if a restart is actually needed, we should return an error + if obj.restartScheduled && obj.RestartOnDiverge != "ignore" { // "ignore", "ifneeded", "error" + restart = true + } + if !apply { + restart = false + } + + var checkOK = true + + dom, err := obj.conn.LookupDomainByName(obj.GetName()) + if err == nil { + // pass + } else if isNotFound(err) { + // domain not found + if obj.absent { + // we can ignore the restart var since we're not running + return true, nil + } + + if !apply { + return false, nil + } + + var c = true + dom, c, err = obj.domainCreate() // create the domain + if err != nil { + return false, errwrap.Wrapf(err, "domainCreate failed") + } else if !c { + checkOK = false + } + + } else { + return false, errwrap.Wrapf(err, "LookupDomainByName failed") + } + defer dom.Free() // the Free() for two possible domain objects above + // domain now exists + + isPersistent, err := dom.IsPersistent() + if err != nil { + // we don't know if the state is ok + return false, errwrap.Wrapf(err, "domain.IsPersistent failed") + } + // check for persistence + if isPersistent == obj.Transient { // if they're different! + if !apply { + return false, nil + } + if isPersistent { + if err := dom.Undefine(); err != nil { + return false, errwrap.Wrapf(err, "domain.Undefine failed") + } + log.Printf("%s[%s]: Domain undefined", obj.Kind(), obj.GetName()) + } else { + domXML, err := dom.GetXMLDesc(libvirt.DOMAIN_XML_INACTIVE) + if err != nil { + return false, errwrap.Wrapf(err, "domain.GetXMLDesc failed") + } + if _, err = obj.conn.DomainDefineXML(domXML); err != nil { + return false, errwrap.Wrapf(err, "conn.DomainDefineXML failed") + } + log.Printf("%s[%s]: Domain defined", obj.Kind(), obj.GetName()) + } + checkOK = false + } + + // shutdown here and let the stateCheckApply fix things up... + // TODO: i think this is the most straight forward process... + if !obj.absent && restart { + if c, err := obj.domainShutdownSync(apply, dom); err != nil { + return false, errwrap.Wrapf(err, "domainShutdownSync failed") + } else if !c { + checkOK = false + restart = false // clear the restart requirement... + } + } + + // FIXME: is doing this early check (therefore twice total) a good idea? + // run additional pre-emptive attr change checks here for hotplug stuff! + if !obj.absent { + if c, err := obj.attrCheckApply(apply, dom); err != nil { + return false, errwrap.Wrapf(err, "early attrCheckApply failed") + } else if !c { + checkOK = false + } + } + // TODO: do we need to run again below after we've booted up the domain? + + // apply correct machine state, eg: startup/shutoff/pause as needed + if c, err := obj.stateCheckApply(apply, dom); err != nil { + return false, errwrap.Wrapf(err, "stateCheckApply failed") + } else if !c { + checkOK = false + } + + // FIXME: should we wait to ensure machine is booted before continuing? + // it may be useful to wait for guest agent to hotplug some ram or cpu! + if !apply { return false, nil } @@ -497,17 +816,22 @@ func (obj *VirtRes) CheckApply(apply bool) (bool, error) { // mem & cpu checks... if !obj.absent { - if c, err := obj.attrCheckApply(apply); err != nil { + if c, err := obj.attrCheckApply(apply, dom); err != nil { return false, errwrap.Wrapf(err, "attrCheckApply failed") } else if !c { checkOK = false } } + // we had to do a restart, we didn't, and we should error if it was needed + if obj.restartScheduled && restart == true && obj.RestartOnDiverge == "error" { + return false, fmt.Errorf("%s[%s]: needed restart but didn't! (RestartOnDiverge: %v)", obj.Kind(), obj.GetName(), obj.RestartOnDiverge) + } + return checkOK, nil // w00t } -// Return the correct domain type based on the uri +// getDomainType returns the correct domain type based on the uri. func (obj VirtRes) getDomainType() string { switch obj.uriScheme { case lxcURI: @@ -517,7 +841,7 @@ func (obj VirtRes) getDomainType() string { } } -// Return the correct os type based on the uri +// getOSType returns the correct os type based on the uri. func (obj VirtRes) getOSType() string { switch obj.uriScheme { case lxcURI: @@ -536,13 +860,30 @@ func (obj VirtRes) getOSInit() string { } } +// getDomainXML returns the representative XML for a domain struct. +// FIXME: replace this with the libvirt-go-xml package instead! func (obj *VirtRes) getDomainXML() string { var b string b += obj.getDomainType() // start domain b += fmt.Sprintf("%s", obj.GetName()) b += fmt.Sprintf("%d", obj.Memory) - b += fmt.Sprintf("%d", obj.CPUs) + + if obj.HotCPUs { + b += fmt.Sprintf("%d", obj.CPUs, obj.MaxCPUs) + b += fmt.Sprintf("") + b += fmt.Sprintf("") // zeroth cpu can't change + for i := uint(1); i < obj.MaxCPUs; i++ { // skip first entry + enabled := "no" + if i < obj.CPUs { + enabled = "yes" + } + b += fmt.Sprintf("", i, enabled) + } + b += fmt.Sprintf("") + } else { + b += fmt.Sprintf("%d", obj.CPUs) + } b += "" b += obj.getOSType() @@ -554,6 +895,13 @@ func (obj *VirtRes) getDomainXML() string { } b += fmt.Sprintf("") + if obj.HotCPUs { + // acpi is needed for cpu hotplug support + b += fmt.Sprintf("") + b += fmt.Sprintf("") + b += fmt.Sprintf("") + } + b += fmt.Sprintf("") // start devices if obj.Disk != nil { @@ -580,6 +928,16 @@ func (obj *VirtRes) getDomainXML() string { } } + // qemu guest agent is not *required* for hotplugging cpus but + // it helps because it can ask the host to make them online... + if obj.HotCPUs { + // enable qemu guest agent (on the host side) + b += fmt.Sprintf("") + b += fmt.Sprintf("") + b += fmt.Sprintf("") + b += fmt.Sprintf("") + } + b += "" b += "" b += "" // end devices @@ -718,6 +1076,12 @@ func (obj *VirtRes) Compare(res Res) bool { if obj.CPUs != res.CPUs { return false } + // we can't change this property while machine is running! + // we do need to return false, so that a new struct gets built, + // which will cause at least one Init() & CheckApply() to run. + if obj.MaxCPUs != res.MaxCPUs { + return false + } // TODO: can we skip the compare of certain properties such as // Memory because this object (but with different memory) can be // *converted* into the new version that has more/less memory? @@ -748,10 +1112,6 @@ func (obj *VirtRes) Compare(res Res) bool { return true } -// CollectPattern applies the pattern for collection resources. -func (obj *VirtRes) CollectPattern(string) { -} - // UnmarshalYAML is the custom unmarshal handler for this struct. // It is primarily useful for setting the defaults. func (obj *VirtRes) UnmarshalYAML(unmarshal func(interface{}) error) error { @@ -781,6 +1141,18 @@ func randMAC() string { fmt.Sprintf(":%x", rand.Intn(255)) } +// isNotFound tells us if this is a domain not found error. +func isNotFound(err error) bool { + if err == nil { + return false + } + if virErr, ok := err.(libvirt.Error); ok && virErr.Code == libvirt.ERR_NO_DOMAIN { + // domain not found + return true + } + return false // some other error +} + // expandHome does a simple expansion of the tilde into your $HOME value. func expandHome(p string) (string, error) { // TODO: this doesn't match strings of the form: ~james/... diff --git a/test/test-bashfmt.sh b/test/test-bashfmt.sh index 6f378e4c..39944f7b 100755 --- a/test/test-bashfmt.sh +++ b/test/test-bashfmt.sh @@ -14,7 +14,7 @@ ROOT=$(dirname "${BASH_SOURCE}")/.. cd "${ROOT}" find_files() { - git ls-files | grep -e '\.sh$' -e '\.bash$' + git ls-files | grep -e '\.sh$' -e '\.bash$' | grep -v 'misc/delta-cpu.sh' } bad_files=$(