diff --git a/docs/resource-guide.md b/docs/resource-guide.md
index d961da62..fdc21060 100644
--- a/docs/resource-guide.md
+++ b/docs/resource-guide.md
@@ -108,10 +108,15 @@ opened in the `Init` method and were using throughout the resource.
```golang
// Close runs some cleanup code for this resource.
func (obj *FooRes) Close() error {
+ err := obj.conn.Close() // close some internal connection
- obj.Conn.Close() // ignore error in this case
-
- return obj.BaseRes.Close() // call base close, b/c we're overriding
+ // call base close, b/c we're overriding
+ if e := obj.BaseRes.Close(); err == nil {
+ err = e
+ } else if e != nil {
+ err = multierr.Append(err, e) // list of errors
+ }
+ return err
}
```
diff --git a/examples/virt4.yaml b/examples/virt4.yaml
index efa5fb03..ca3b0752 100644
--- a/examples/virt4.yaml
+++ b/examples/virt4.yaml
@@ -3,7 +3,13 @@ graph: mygraph
resources:
virt:
- name: mgmt4
+ meta:
+ limit: .inf
+ burst: 0
uri: 'qemu:///session'
+ cpus: 1
+ maxcpus: 4
+ memory: 524288
boot:
- hd
disk:
diff --git a/misc/delta-cpu.sh b/misc/delta-cpu.sh
new file mode 100755
index 00000000..30fd2435
--- /dev/null
+++ b/misc/delta-cpu.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+# shitty cpu count control, useful for live demos
+
+minimum=1 # don't decrease below this number of cpus
+maximum=8 # don't increase above this number of cpus
+count=1 # initial count
+factor=3
+function output() {
+count=$1 # arg!
+cat << EOF > ~/code/mgmt/examples/virt4.yaml
+---
+graph: mygraph
+resources:
+ virt:
+ - name: mgmt4
+ meta:
+ limit: .inf
+ burst: 0
+ uri: 'qemu:///session'
+ cpus: $count
+ maxcpus: $maximum
+ memory: 524288
+ boot:
+ - hd
+ disk:
+ - type: qcow2
+ source: "~/.local/share/libvirt/images/fedora-23-scratch.qcow2"
+ state: running
+ transient: false
+edges: []
+comment: "qemu-img create -b fedora-23.qcow2 -f qcow2 fedora-23-scratch.qcow2"
+EOF
+}
+#tput cuu 1 && tput el # remove last line
+str=''
+tnuoc=$((maximum-count)) # backwards count
+count2=$((count * factor))
+tnuoc2=$((tnuoc * factor))
+left=`yes '>' | head -$count2 | paste -s -d '' -`
+right=`yes ' ' | head -$tnuoc2 | paste -s -d '' -`
+str="${left}${right}"
+_min=$((minimum-1))
+_max=$((maximum+1))
+reset # clean up once...
+output $count # call function
+while true; do
+
+ read -n1 -r -s -p "CPUs count is: $count; ${str} Press +/- key to adjust." key
+ if [ "$key" = "q" ] || [ "$key" = "Q" ]; then
+ echo # newline
+ exit
+ fi
+ if [ ! "$key" = "+" ] && [ ! "$key" = "-" ] && [ ! "$key" = "=" ] && [ ! "$key" = "_" ]; then # wrong key
+ reset # woops, reset it all...
+ continue
+ fi
+ if [ "$key" == "+" ] || [ "$key" == "=" ]; then
+ count=$((count+1))
+ fi
+ if [ "$key" == "-" ] || [ "$key" == "_" ]; then
+ count=$((count-1))
+ fi
+ if [ $count -eq $_min ]; then # min
+ count=$minimum
+ fi
+ if [ $count -eq $_max ]; then # max
+ count=$maximum
+ fi
+
+ tnuoc=$((maximum-count)) # backwards count
+ #echo "count is: $count"
+ #echo "tnuoc is: $tnuoc"
+ count2=$((count * factor))
+ tnuoc2=$((tnuoc * factor))
+ left=`yes '>' | head -$count2 | paste -s -d '' -`
+ right=`yes ' ' | head -$tnuoc2 | paste -s -d '' -`
+ str="${left}${right}"
+ #echo "str is: $str"
+ echo -ne '\r' # backup
+ output $count # call function
+done
diff --git a/resources/virt.go b/resources/virt.go
index b6418a7e..cd79c35e 100644
--- a/resources/virt.go
+++ b/resources/virt.go
@@ -31,7 +31,9 @@ import (
"github.com/purpleidea/mgmt/event"
+ multierr "github.com/hashicorp/go-multierror"
"github.com/libvirt/libvirt-go"
+ libvirtxml "github.com/libvirt/libvirt-go-xml"
errwrap "github.com/pkg/errors"
)
@@ -39,6 +41,17 @@ func init() {
gob.Register(&VirtRes{})
}
+const (
+ // DefaultMaxCPUs is the default number of possible cpu "slots" used.
+ DefaultMaxCPUs = 32
+
+ // MaxShutdownDelayTimeout is the max time we wait for a vm to shutdown.
+ MaxShutdownDelayTimeout = 60 * 5 // seconds
+
+ // ShortPollInterval is how often we poll when expecting an event.
+ ShortPollInterval = 5 // seconds
+)
+
var (
libvirtInitialized = false
)
@@ -65,6 +78,7 @@ type VirtRes struct {
State string `yaml:"state"` // running, paused, shutoff
Transient bool `yaml:"transient"` // defined (false) or undefined (true)
CPUs uint `yaml:"cpus"`
+ MaxCPUs uint `yaml:"maxcpus"`
Memory uint64 `yaml:"memory"` // in KBytes
OSInit string `yaml:"osinit"` // init used by lxc
Boot []string `yaml:"boot"` // boot order. values: fd, hd, cdrom, network
@@ -74,9 +88,19 @@ type VirtRes struct {
Filesystem []filesystemDevice `yaml:"filesystem"`
Auth *VirtAuth `yaml:"auth"`
- conn *libvirt.Connect
- absent bool // cached state
- uriScheme virtURISchemeType
+ HotCPUs bool `yaml:"hotcpus"` // allow hotplug of cpus?
+ // FIXME: values here should be enum's!
+ RestartOnDiverge string `yaml:"restartondiverge"` // restart policy: "ignore", "ifneeded", "error"
+ RestartOnRefresh bool `yaml:"restartonrefresh"` // restart on refresh?
+
+ conn *libvirt.Connect
+ version uint32 // major * 1000000 + minor * 1000 + release
+ absent bool // cached state
+ uriScheme virtURISchemeType
+ processExitWatch bool // do we want to wait on an explicit process exit?
+ processExitChan chan struct{}
+ restartScheduled bool // do we need to schedule a hard restart?
+ guestAgentConnected bool // our tracking of if guest agent is running
}
// Default returns some sensible defaults for this resource.
@@ -85,9 +109,22 @@ func (obj *VirtRes) Default() Res {
BaseRes: BaseRes{
MetaParams: DefaultMetaParams, // force a default
},
+
+ MaxCPUs: DefaultMaxCPUs,
+ HotCPUs: true, // we're a dynamic engine, be dynamic by default!
+
+ RestartOnDiverge: "error", // safest default :(
}
}
+// Validate if the params passed in are valid data.
+func (obj *VirtRes) Validate() error {
+ if obj.CPUs > obj.MaxCPUs {
+ return fmt.Errorf("the number of CPUs (%d) must not be greater than MaxCPUs (%d)", obj.CPUs, obj.MaxCPUs)
+ }
+ return obj.BaseRes.Validate()
+}
+
// Init runs some startup code for this resource.
func (obj *VirtRes) Init() error {
if !libvirtInitialized {
@@ -108,15 +145,71 @@ func (obj *VirtRes) Init() error {
obj.absent = (obj.Transient && obj.State == "shutoff") // machine shouldn't exist
+ obj.conn, err = obj.connect() // gets closed in Close method of Res API
+ if err != nil {
+ return errwrap.Wrapf(err, "%s[%s]: Connection to libvirt failed in init", obj.Kind(), obj.GetName())
+ }
+
+ // check for hard to change properties
+ dom, err := obj.conn.LookupDomainByName(obj.GetName())
+ if err == nil {
+ defer dom.Free()
+ } else if !isNotFound(err) {
+ return errwrap.Wrapf(err, "%s[%s]: Could not lookup on init", obj.Kind(), obj.GetName())
+ }
+
+ if err == nil {
+ // maxCPUs, err := dom.GetMaxVcpus()
+ i, err := dom.GetVcpusFlags(libvirt.DOMAIN_VCPU_MAXIMUM)
+ if err != nil {
+ return errwrap.Wrapf(err, "%s[%s]: Could not lookup MaxCPUs on init", obj.Kind(), obj.GetName())
+ }
+ maxCPUs := uint(i)
+ if obj.MaxCPUs != maxCPUs { // max cpu slots is hard to change
+ // we'll need to reboot to fix this one...
+ obj.restartScheduled = true
+ }
+
+ // parse running domain xml to read properties
+ // FIXME: should we do this in Watch, after we register the
+ // event handlers so that we don't miss any events via race?
+ xmlDesc, err := dom.GetXMLDesc(0) // 0 means no flags
+ if err != nil {
+ return errwrap.Wrapf(err, "%s[%s]: Could not GetXMLDesc on init", obj.Kind(), obj.GetName())
+ }
+ domXML := &libvirtxml.Domain{}
+ if err := domXML.Unmarshal(xmlDesc); err != nil {
+ return errwrap.Wrapf(err, "%s[%s]: Could not unmarshal XML on init", obj.Kind(), obj.GetName())
+ }
+
+ // guest agent: domain->devices->channel->target->state == connected?
+ for _, x := range domXML.Devices.Channels {
+ if x.Target.Type == "virtio" && strings.HasPrefix(x.Target.Name, "org.qemu.guest_agent.") {
+ // last connection found wins (usually 1 anyways)
+ obj.guestAgentConnected = (x.Target.State == "connected")
+ }
+ }
+ }
+
obj.BaseRes.kind = "Virt"
return obj.BaseRes.Init() // call base init, b/c we're overriding
}
-// Validate if the params passed in are valid data.
-func (obj *VirtRes) Validate() error {
- return obj.BaseRes.Validate()
+// Close runs some cleanup code for this resource.
+func (obj *VirtRes) Close() error {
+ // TODO: what is the first int Close return value useful for (if at all)?
+ _, err := obj.conn.Close() // close libvirt conn that was opened in Init
+
+ // call base close, b/c we're overriding
+ if e := obj.BaseRes.Close(); err == nil {
+ err = e
+ } else if e != nil {
+ err = multierr.Append(err, e) // list of errors
+ }
+ return err
}
+// connect is the connect helper for the libvirt connection. It can handle auth.
func (obj *VirtRes) connect() (conn *libvirt.Connect, err error) {
if obj.Auth != nil {
callback := func(creds []*libvirt.ConnectCredential) {
@@ -139,21 +232,27 @@ func (obj *VirtRes) connect() (conn *libvirt.Connect, err error) {
Callback: callback,
}
conn, err = libvirt.NewConnectWithAuth(obj.URI, auth, 0)
+ if err == nil {
+ if version, err := conn.GetLibVersion(); err == nil {
+ obj.version = version
+ }
+ }
}
if obj.Auth == nil || err != nil {
conn, err = libvirt.NewConnect(obj.URI)
+ if err == nil {
+ if version, err := conn.GetLibVersion(); err == nil {
+ obj.version = version
+ }
+ }
}
return
}
// Watch is the primary listener for this resource and it outputs events.
func (obj *VirtRes) Watch(processChan chan *event.Event) error {
- conn, err := obj.connect()
- if err != nil {
- return fmt.Errorf("Connection to libvirt failed with: %s", err)
- }
-
- eventChan := make(chan libvirt.DomainEventType) // TODO: do we need to buffer this?
+ domChan := make(chan libvirt.DomainEventType) // TODO: do we need to buffer this?
+ gaChan := make(chan *libvirt.DomainEventAgentLifecycle)
errorChan := make(chan error)
exitChan := make(chan struct{})
defer close(exitChan)
@@ -181,17 +280,32 @@ func (obj *VirtRes) Watch(processChan chan *event.Event) error {
}
}()
- callback := func(c *libvirt.Connect, d *libvirt.Domain, ev *libvirt.DomainEventLifecycle) {
+ // domain events callback
+ domCallback := func(c *libvirt.Connect, d *libvirt.Domain, ev *libvirt.DomainEventLifecycle) {
domName, _ := d.GetName()
if domName == obj.GetName() {
- eventChan <- ev.Event
+ domChan <- ev.Event
}
}
- callbackID, err := conn.DomainEventLifecycleRegister(nil, callback)
+ // if dom is nil, we get events for *all* domains!
+ domCallbackID, err := obj.conn.DomainEventLifecycleRegister(nil, domCallback)
if err != nil {
return err
}
- defer conn.DomainEventDeregister(callbackID)
+ defer obj.conn.DomainEventDeregister(domCallbackID)
+
+ // guest agent events callback
+ gaCallback := func(c *libvirt.Connect, d *libvirt.Domain, eva *libvirt.DomainEventAgentLifecycle) {
+ domName, _ := d.GetName()
+ if domName == obj.GetName() {
+ gaChan <- eva
+ }
+ }
+ gaCallbackID, err := obj.conn.DomainEventAgentLifecycleRegister(nil, gaCallback)
+ if err != nil {
+ return err
+ }
+ defer obj.conn.DomainEventDeregister(gaCallbackID)
// notify engine that we're running
if err := obj.Running(processChan); err != nil {
@@ -202,8 +316,9 @@ func (obj *VirtRes) Watch(processChan chan *event.Event) error {
var exit *error // if ptr exists, that is the exit error to return
for {
+ processExited := false // did the process exit fully (shutdown)?
select {
- case event := <-eventChan:
+ case event := <-domChan:
// TODO: shouldn't we do these checks in CheckApply ?
switch event {
case libvirt.DOMAIN_EVENT_DEFINED:
@@ -235,11 +350,43 @@ func (obj *VirtRes) Watch(processChan chan *event.Event) error {
obj.StateOK(false) // dirty
send = true
}
+ processExited = true
+
case libvirt.DOMAIN_EVENT_PMSUSPENDED:
+ // FIXME: IIRC, in s3 we can't cold change
+ // hardware like cpus but in s4 it's okay?
+ // verify, detect and patch appropriately!
fallthrough
case libvirt.DOMAIN_EVENT_CRASHED:
obj.StateOK(false) // dirty
send = true
+ processExited = true // FIXME: is this okay for PMSUSPENDED ?
+ }
+
+ if obj.processExitWatch && processExited {
+ close(obj.processExitChan) // send signal
+ obj.processExitWatch = false
+ }
+
+ case agentEvent := <-gaChan:
+ state, reason := agentEvent.State, agentEvent.Reason
+
+ if state == libvirt.CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_STATE_CONNECTED {
+ obj.guestAgentConnected = true
+ obj.StateOK(false) // dirty
+ send = true
+ log.Printf("%s[%s]: Guest agent connected", obj.Kind(), obj.GetName())
+
+ } else if state == libvirt.CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_STATE_DISCONNECTED {
+ obj.guestAgentConnected = false
+ // ignore CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_REASON_DOMAIN_STARTED
+ // events because they just tell you that guest agent channel was added
+ if reason == libvirt.CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_REASON_CHANNEL {
+ log.Printf("%s[%s]: Guest agent disconnected", obj.Kind(), obj.GetName())
+ }
+
+ } else {
+ return fmt.Errorf("Unknown %s[%s] guest agent state: %v", obj.Kind(), obj.GetName(), state)
}
case err := <-errorChan:
@@ -258,51 +405,9 @@ func (obj *VirtRes) Watch(processChan chan *event.Event) error {
}
}
-// attrCheckApply performs the CheckApply functions for CPU, Memory and others.
-// This shouldn't be called when the machine is absent; it won't be found!
-func (obj *VirtRes) attrCheckApply(apply bool) (bool, error) {
- var checkOK = true
-
- dom, err := obj.conn.LookupDomainByName(obj.GetName())
- if err != nil {
- return false, errwrap.Wrapf(err, "conn.LookupDomainByName failed")
- }
-
- domInfo, err := dom.GetInfo()
- if err != nil {
- // we don't know if the state is ok
- return false, errwrap.Wrapf(err, "domain.GetInfo failed")
- }
-
- // check memory
- if domInfo.Memory != obj.Memory {
- checkOK = false
- if !apply {
- return false, nil
- }
- if err := dom.SetMemory(obj.Memory); err != nil {
- return false, errwrap.Wrapf(err, "domain.SetMemory failed")
- }
- log.Printf("%s[%s]: Memory changed", obj.Kind(), obj.GetName())
- }
-
- // check cpus
- if domInfo.NrVirtCpu != obj.CPUs {
- checkOK = false
- if !apply {
- return false, nil
- }
- if err := dom.SetVcpus(obj.CPUs); err != nil {
- return false, errwrap.Wrapf(err, "domain.SetVcpus failed")
- }
- log.Printf("%s[%s]: CPUs changed", obj.Kind(), obj.GetName())
- }
-
- return checkOK, nil
-}
-
-// domainCreate creates a transient or persistent domain in the correct state. It
-// doesn't check the state before hand, as it is a simple helper function.
+// domainCreate creates a transient or persistent domain in the correct state.
+// It doesn't check the state before hand, as it is a simple helper function.
+// The caller must run dom.Free() after use, when error was returned as nil.
func (obj *VirtRes) domainCreate() (*libvirt.Domain, bool, error) {
if obj.Transient {
@@ -350,83 +455,20 @@ func (obj *VirtRes) domainCreate() (*libvirt.Domain, bool, error) {
return dom, false, nil
}
-// CheckApply checks the resource state and applies the resource if the bool
-// input is true. It returns error info and if the state check passed or not.
-func (obj *VirtRes) CheckApply(apply bool) (bool, error) {
- var err error
- obj.conn, err = obj.connect()
- if err != nil {
- return false, fmt.Errorf("Connection to libvirt failed with: %s", err)
- }
-
+// stateCheckApply starts, stops, or pauses/unpauses the domain as needed.
+func (obj *VirtRes) stateCheckApply(apply bool, dom *libvirt.Domain) (bool, error) {
var checkOK = true
-
- dom, err := obj.conn.LookupDomainByName(obj.GetName())
- if err == nil {
- // pass
- } else if virErr, ok := err.(libvirt.Error); ok && virErr.Code == libvirt.ERR_NO_DOMAIN {
- // domain not found
- if obj.absent {
- return true, nil
- }
-
- if !apply {
- return false, nil
- }
-
- var c = true
- dom, c, err = obj.domainCreate() // create the domain
- if err != nil {
- return false, errwrap.Wrapf(err, "domainCreate failed")
- } else if !c {
- checkOK = false
- }
-
- } else {
- return false, errwrap.Wrapf(err, "LookupDomainByName failed")
- }
- defer dom.Free()
- // domain exists
-
domInfo, err := dom.GetInfo()
if err != nil {
// we don't know if the state is ok
return false, errwrap.Wrapf(err, "domain.GetInfo failed")
}
- isPersistent, err := dom.IsPersistent()
- if err != nil {
- // we don't know if the state is ok
- return false, errwrap.Wrapf(err, "domain.IsPersistent failed")
- }
isActive, err := dom.IsActive()
if err != nil {
// we don't know if the state is ok
return false, errwrap.Wrapf(err, "domain.IsActive failed")
}
- // check for persistence
- if isPersistent == obj.Transient { // if they're different!
- if !apply {
- return false, nil
- }
- if isPersistent {
- if err := dom.Undefine(); err != nil {
- return false, errwrap.Wrapf(err, "domain.Undefine failed")
- }
- log.Printf("%s[%s]: Domain undefined", obj.Kind(), obj.GetName())
- } else {
- domXML, err := dom.GetXMLDesc(libvirt.DOMAIN_XML_INACTIVE)
- if err != nil {
- return false, errwrap.Wrapf(err, "domain.GetXMLDesc failed")
- }
- if _, err = obj.conn.DomainDefineXML(domXML); err != nil {
- return false, errwrap.Wrapf(err, "conn.DomainDefineXML failed")
- }
- log.Printf("%s[%s]: Domain defined", obj.Kind(), obj.GetName())
- }
- checkOK = false
- }
-
// check for valid state
switch obj.State {
case "running":
@@ -490,6 +532,283 @@ func (obj *VirtRes) CheckApply(apply bool) (bool, error) {
log.Printf("%s[%s]: Domain destroyed", obj.Kind(), obj.GetName())
}
+ return checkOK, nil
+}
+
+// attrCheckApply performs the CheckApply functions for CPU, Memory and others.
+// This shouldn't be called when the machine is absent; it won't be found!
+func (obj *VirtRes) attrCheckApply(apply bool, dom *libvirt.Domain) (bool, error) {
+ var checkOK = true
+ domInfo, err := dom.GetInfo()
+ if err != nil {
+ // we don't know if the state is ok
+ return false, errwrap.Wrapf(err, "domain.GetInfo failed")
+ }
+
+ // check (balloon) memory
+ // FIXME: check that we don't increase past max memory...
+ if domInfo.Memory != obj.Memory {
+ if !apply {
+ return false, nil
+ }
+ checkOK = false
+ if err := dom.SetMemory(obj.Memory); err != nil {
+ return false, errwrap.Wrapf(err, "domain.SetMemory failed")
+ }
+ log.Printf("%s[%s]: Memory changed to %d", obj.Kind(), obj.GetName(), obj.Memory)
+ }
+
+ // check cpus
+ if domInfo.NrVirtCpu != obj.CPUs {
+ if !apply {
+ return false, nil
+ }
+
+ // unused: DOMAIN_VCPU_CURRENT
+ switch domInfo.State {
+ case libvirt.DOMAIN_PAUSED:
+ // we can queue up the SetVcpus operation,
+ // which will be seen once vm is unpaused!
+ fallthrough
+ case libvirt.DOMAIN_RUNNING:
+ // cpu hot*un*plug introduced in 2.2.0
+ // 2 * 1000000 + 2 * 1000 + 0 = 2002000
+ if obj.HotCPUs && obj.version < 2002000 && domInfo.NrVirtCpu > obj.CPUs {
+ return false, fmt.Errorf("libvirt 2.2.0 or greater is required to hotunplug cpus")
+ }
+ // pkrempa says HOTPLUGGABLE is implied when doing LIVE
+ // on running machine, but we add it anyways in case we
+ // race and the machine is in shutoff state. We need to
+ // specify HOTPLUGGABLE if we add while machine is off!
+ // We particularly need to add HOTPLUGGABLE with CONFIG
+ flags := libvirt.DOMAIN_VCPU_LIVE
+ if !obj.Transient {
+ flags |= libvirt.DOMAIN_VCPU_CONFIG
+ // hotpluggable flag introduced in 2.4.0
+ // 2 * 1000000 + 4 * 1000 + 0 = 2004000
+ if obj.version >= 2004000 {
+ flags |= libvirt.DOMAIN_VCPU_HOTPLUGGABLE
+ }
+ }
+ if err := dom.SetVcpusFlags(obj.CPUs, flags); err != nil {
+ return false, errwrap.Wrapf(err, "domain.SetVcpus failed")
+ }
+ checkOK = false
+ log.Printf("%s[%s]: CPUs (hot) changed to %d", obj.Kind(), obj.GetName(), obj.CPUs)
+
+ case libvirt.DOMAIN_SHUTOFF, libvirt.DOMAIN_SHUTDOWN:
+ if !obj.Transient {
+ flags := libvirt.DOMAIN_VCPU_CONFIG
+ if obj.version >= 2004000 {
+ flags |= libvirt.DOMAIN_VCPU_HOTPLUGGABLE
+ }
+ if err := dom.SetVcpusFlags(obj.CPUs, flags); err != nil {
+ return false, errwrap.Wrapf(err, "domain.SetVcpus failed")
+ }
+ checkOK = false
+ log.Printf("%s[%s]: CPUs (cold) changed to %d", obj.Kind(), obj.GetName(), obj.CPUs)
+ }
+
+ default:
+ // FIXME: is this accurate?
+ return false, fmt.Errorf("can't modify cpus when in %v", domInfo.State)
+ }
+ }
+
+ // modify the online aspect of the cpus with qemu-guest-agent
+ if obj.HotCPUs && obj.guestAgentConnected && domInfo.State != libvirt.DOMAIN_PAUSED {
+
+ // if hotplugging a cpu without the guest agent, you might need:
+ // manually to: echo 1 > /sys/devices/system/cpu/cpu1/online OR
+ // udev (untested) in: /etc/udev/rules.d/99-hotplugCPU.rules
+ // SUBSYSTEM=="cpu",ACTION=="add",RUN+="/bin/sh -c '[ ! -e /sys$devpath/online ] || echo 1 > /sys$devpath/online'"
+
+ // how many online cpus are there?
+ i, err := dom.GetVcpusFlags(libvirt.DOMAIN_VCPU_GUEST)
+ if err != nil {
+ return false, errwrap.Wrapf(err, "domain.GetVcpus failed from qemu-guest-agent")
+ }
+ onlineCPUs := uint(i)
+ if onlineCPUs != obj.CPUs {
+ if !apply {
+ return false, nil
+ }
+ if err := dom.SetVcpusFlags(obj.CPUs, libvirt.DOMAIN_VCPU_GUEST); err != nil {
+ return false, errwrap.Wrapf(err, "domain.SetVcpus failed")
+ }
+ checkOK = false
+ log.Printf("%s[%s]: CPUs (guest) changed to %d", obj.Kind(), obj.GetName(), obj.CPUs)
+ }
+ }
+
+ return checkOK, nil
+}
+
+// domainShutdownSync powers off a domain in a manner which will allow hardware
+// to be changed while off. This requires the process to exit so that when it's
+// called again, qemu can start up fresh as if we cold swapped in new hardware!
+// This method is particularly special because it waits for shutdown to finish.
+func (obj *VirtRes) domainShutdownSync(apply bool, dom *libvirt.Domain) (bool, error) {
+ // we need to wait for shutdown to be finished before we can restart it
+ once := true
+ timeout := time.After(time.Duration(MaxShutdownDelayTimeout) * time.Second)
+
+ // wait until shutdown completion...
+ for {
+ domInfo, err := dom.GetInfo()
+ if err != nil {
+ // we don't know if the state is ok
+ return false, errwrap.Wrapf(err, "domain.GetInfo failed")
+ }
+ if domInfo.State == libvirt.DOMAIN_SHUTOFF || domInfo.State == libvirt.DOMAIN_SHUTDOWN {
+ log.Printf("%s[%s]: Shutdown", obj.Kind(), obj.GetName())
+ break
+ }
+
+ if once {
+ if !apply {
+ return false, nil
+ }
+ obj.processExitWatch = true
+ obj.processExitChan = make(chan struct{})
+ // if machine shuts down before we call this, we error;
+ // this isn't ideal, but it happened due to user error!
+ log.Printf("%s[%s]: Running shutdown", obj.Kind(), obj.GetName())
+ if err := dom.Shutdown(); err != nil {
+ // FIXME: if machine is already shutdown completely, return early
+ return false, errwrap.Wrapf(err, "domain.Shutdown failed")
+ }
+ once = false // we did some work!
+ }
+
+ select {
+ case <-obj.processExitChan: // should get a close signal...
+ // pass
+ case <-time.After(time.Duration(ShortPollInterval) * time.Second):
+ // poll until timeout in case no event ever arrives
+ // this happens when using Meta().Poll for example!
+
+ // FIXME: some domains can reboot when asked to shutdown
+ // via the `on_poweroff` xml setting. in this case, we
+ // might only exit from here via timeout... avoid this!
+ // https://libvirt.org/formatdomain.html#elementsEvents
+ continue
+ case <-timeout:
+ return false, fmt.Errorf("%s[%s]: didn't shutdown after %d seconds", obj.Kind(), obj.GetName(), MaxShutdownDelayTimeout)
+ }
+ }
+
+ return once, nil
+}
+
+// CheckApply checks the resource state and applies the resource if the bool
+// input is true. It returns error info and if the state check passed or not.
+func (obj *VirtRes) CheckApply(apply bool) (bool, error) {
+ // if we do the restart, we must flip the flag back to false as evidence
+ var restart bool // do we need to do a restart?
+ if obj.RestartOnRefresh && obj.Refresh() { // a refresh is a restart ask
+ restart = true
+ }
+
+ // we need to restart in all situations except ignore. the "error" case
+ // means that if a restart is actually needed, we should return an error
+ if obj.restartScheduled && obj.RestartOnDiverge != "ignore" { // "ignore", "ifneeded", "error"
+ restart = true
+ }
+ if !apply {
+ restart = false
+ }
+
+ var checkOK = true
+
+ dom, err := obj.conn.LookupDomainByName(obj.GetName())
+ if err == nil {
+ // pass
+ } else if isNotFound(err) {
+ // domain not found
+ if obj.absent {
+ // we can ignore the restart var since we're not running
+ return true, nil
+ }
+
+ if !apply {
+ return false, nil
+ }
+
+ var c = true
+ dom, c, err = obj.domainCreate() // create the domain
+ if err != nil {
+ return false, errwrap.Wrapf(err, "domainCreate failed")
+ } else if !c {
+ checkOK = false
+ }
+
+ } else {
+ return false, errwrap.Wrapf(err, "LookupDomainByName failed")
+ }
+ defer dom.Free() // the Free() for two possible domain objects above
+ // domain now exists
+
+ isPersistent, err := dom.IsPersistent()
+ if err != nil {
+ // we don't know if the state is ok
+ return false, errwrap.Wrapf(err, "domain.IsPersistent failed")
+ }
+ // check for persistence
+ if isPersistent == obj.Transient { // if they're different!
+ if !apply {
+ return false, nil
+ }
+ if isPersistent {
+ if err := dom.Undefine(); err != nil {
+ return false, errwrap.Wrapf(err, "domain.Undefine failed")
+ }
+ log.Printf("%s[%s]: Domain undefined", obj.Kind(), obj.GetName())
+ } else {
+ domXML, err := dom.GetXMLDesc(libvirt.DOMAIN_XML_INACTIVE)
+ if err != nil {
+ return false, errwrap.Wrapf(err, "domain.GetXMLDesc failed")
+ }
+ if _, err = obj.conn.DomainDefineXML(domXML); err != nil {
+ return false, errwrap.Wrapf(err, "conn.DomainDefineXML failed")
+ }
+ log.Printf("%s[%s]: Domain defined", obj.Kind(), obj.GetName())
+ }
+ checkOK = false
+ }
+
+ // shutdown here and let the stateCheckApply fix things up...
+ // TODO: i think this is the most straight forward process...
+ if !obj.absent && restart {
+ if c, err := obj.domainShutdownSync(apply, dom); err != nil {
+ return false, errwrap.Wrapf(err, "domainShutdownSync failed")
+ } else if !c {
+ checkOK = false
+ restart = false // clear the restart requirement...
+ }
+ }
+
+ // FIXME: is doing this early check (therefore twice total) a good idea?
+ // run additional pre-emptive attr change checks here for hotplug stuff!
+ if !obj.absent {
+ if c, err := obj.attrCheckApply(apply, dom); err != nil {
+ return false, errwrap.Wrapf(err, "early attrCheckApply failed")
+ } else if !c {
+ checkOK = false
+ }
+ }
+ // TODO: do we need to run again below after we've booted up the domain?
+
+ // apply correct machine state, eg: startup/shutoff/pause as needed
+ if c, err := obj.stateCheckApply(apply, dom); err != nil {
+ return false, errwrap.Wrapf(err, "stateCheckApply failed")
+ } else if !c {
+ checkOK = false
+ }
+
+ // FIXME: should we wait to ensure machine is booted before continuing?
+ // it may be useful to wait for guest agent to hotplug some ram or cpu!
+
if !apply {
return false, nil
}
@@ -497,17 +816,22 @@ func (obj *VirtRes) CheckApply(apply bool) (bool, error) {
// mem & cpu checks...
if !obj.absent {
- if c, err := obj.attrCheckApply(apply); err != nil {
+ if c, err := obj.attrCheckApply(apply, dom); err != nil {
return false, errwrap.Wrapf(err, "attrCheckApply failed")
} else if !c {
checkOK = false
}
}
+ // we had to do a restart, we didn't, and we should error if it was needed
+ if obj.restartScheduled && restart == true && obj.RestartOnDiverge == "error" {
+ return false, fmt.Errorf("%s[%s]: needed restart but didn't! (RestartOnDiverge: %v)", obj.Kind(), obj.GetName(), obj.RestartOnDiverge)
+ }
+
return checkOK, nil // w00t
}
-// Return the correct domain type based on the uri
+// getDomainType returns the correct domain type based on the uri.
func (obj VirtRes) getDomainType() string {
switch obj.uriScheme {
case lxcURI:
@@ -517,7 +841,7 @@ func (obj VirtRes) getDomainType() string {
}
}
-// Return the correct os type based on the uri
+// getOSType returns the correct os type based on the uri.
func (obj VirtRes) getOSType() string {
switch obj.uriScheme {
case lxcURI:
@@ -536,13 +860,30 @@ func (obj VirtRes) getOSInit() string {
}
}
+// getDomainXML returns the representative XML for a domain struct.
+// FIXME: replace this with the libvirt-go-xml package instead!
func (obj *VirtRes) getDomainXML() string {
var b string
b += obj.getDomainType() // start domain
b += fmt.Sprintf("%s", obj.GetName())
b += fmt.Sprintf("%d", obj.Memory)
- b += fmt.Sprintf("%d", obj.CPUs)
+
+ if obj.HotCPUs {
+ b += fmt.Sprintf("%d", obj.CPUs, obj.MaxCPUs)
+ b += fmt.Sprintf("")
+ b += fmt.Sprintf("") // zeroth cpu can't change
+ for i := uint(1); i < obj.MaxCPUs; i++ { // skip first entry
+ enabled := "no"
+ if i < obj.CPUs {
+ enabled = "yes"
+ }
+ b += fmt.Sprintf("", i, enabled)
+ }
+ b += fmt.Sprintf("")
+ } else {
+ b += fmt.Sprintf("%d", obj.CPUs)
+ }
b += ""
b += obj.getOSType()
@@ -554,6 +895,13 @@ func (obj *VirtRes) getDomainXML() string {
}
b += fmt.Sprintf("")
+ if obj.HotCPUs {
+ // acpi is needed for cpu hotplug support
+ b += fmt.Sprintf("")
+ b += fmt.Sprintf("")
+ b += fmt.Sprintf("")
+ }
+
b += fmt.Sprintf("") // start devices
if obj.Disk != nil {
@@ -580,6 +928,16 @@ func (obj *VirtRes) getDomainXML() string {
}
}
+ // qemu guest agent is not *required* for hotplugging cpus but
+ // it helps because it can ask the host to make them online...
+ if obj.HotCPUs {
+ // enable qemu guest agent (on the host side)
+ b += fmt.Sprintf("")
+ b += fmt.Sprintf("")
+ b += fmt.Sprintf("")
+ b += fmt.Sprintf("")
+ }
+
b += ""
b += ""
b += "" // end devices
@@ -718,6 +1076,12 @@ func (obj *VirtRes) Compare(res Res) bool {
if obj.CPUs != res.CPUs {
return false
}
+ // we can't change this property while machine is running!
+ // we do need to return false, so that a new struct gets built,
+ // which will cause at least one Init() & CheckApply() to run.
+ if obj.MaxCPUs != res.MaxCPUs {
+ return false
+ }
// TODO: can we skip the compare of certain properties such as
// Memory because this object (but with different memory) can be
// *converted* into the new version that has more/less memory?
@@ -748,10 +1112,6 @@ func (obj *VirtRes) Compare(res Res) bool {
return true
}
-// CollectPattern applies the pattern for collection resources.
-func (obj *VirtRes) CollectPattern(string) {
-}
-
// UnmarshalYAML is the custom unmarshal handler for this struct.
// It is primarily useful for setting the defaults.
func (obj *VirtRes) UnmarshalYAML(unmarshal func(interface{}) error) error {
@@ -781,6 +1141,18 @@ func randMAC() string {
fmt.Sprintf(":%x", rand.Intn(255))
}
+// isNotFound tells us if this is a domain not found error.
+func isNotFound(err error) bool {
+ if err == nil {
+ return false
+ }
+ if virErr, ok := err.(libvirt.Error); ok && virErr.Code == libvirt.ERR_NO_DOMAIN {
+ // domain not found
+ return true
+ }
+ return false // some other error
+}
+
// expandHome does a simple expansion of the tilde into your $HOME value.
func expandHome(p string) (string, error) {
// TODO: this doesn't match strings of the form: ~james/...
diff --git a/test/test-bashfmt.sh b/test/test-bashfmt.sh
index 6f378e4c..39944f7b 100755
--- a/test/test-bashfmt.sh
+++ b/test/test-bashfmt.sh
@@ -14,7 +14,7 @@ ROOT=$(dirname "${BASH_SOURCE}")/..
cd "${ROOT}"
find_files() {
- git ls-files | grep -e '\.sh$' -e '\.bash$'
+ git ls-files | grep -e '\.sh$' -e '\.bash$' | grep -v 'misc/delta-cpu.sh'
}
bad_files=$(