resources: virt: Allow hotplugging

This allows hot (un)plugging of CPU's! It also includes some general
cleanups which were necessary to support this as well as some other
features to the virt resource. Hotunplug requires Fedora 25.

It also comes with a mini shell script to help demo this capability.

Many thanks to pkrempa for his help with the libvirt API!
This commit is contained in:
James Shubin
2017-01-10 02:12:51 -05:00
parent 74dfb9d88d
commit bec7f1726f
5 changed files with 603 additions and 139 deletions

View File

@@ -108,10 +108,15 @@ opened in the `Init` method and were using throughout the resource.
```golang
// Close runs some cleanup code for this resource.
func (obj *FooRes) Close() error {
err := obj.conn.Close() // close some internal connection
obj.Conn.Close() // ignore error in this case
return obj.BaseRes.Close() // call base close, b/c we're overriding
// call base close, b/c we're overriding
if e := obj.BaseRes.Close(); err == nil {
err = e
} else if e != nil {
err = multierr.Append(err, e) // list of errors
}
return err
}
```

View File

@@ -3,7 +3,13 @@ graph: mygraph
resources:
virt:
- name: mgmt4
meta:
limit: .inf
burst: 0
uri: 'qemu:///session'
cpus: 1
maxcpus: 4
memory: 524288
boot:
- hd
disk:

81
misc/delta-cpu.sh Executable file
View File

@@ -0,0 +1,81 @@
#!/bin/bash
# shitty cpu count control, useful for live demos
minimum=1 # don't decrease below this number of cpus
maximum=8 # don't increase above this number of cpus
count=1 # initial count
factor=3
function output() {
count=$1 # arg!
cat << EOF > ~/code/mgmt/examples/virt4.yaml
---
graph: mygraph
resources:
virt:
- name: mgmt4
meta:
limit: .inf
burst: 0
uri: 'qemu:///session'
cpus: $count
maxcpus: $maximum
memory: 524288
boot:
- hd
disk:
- type: qcow2
source: "~/.local/share/libvirt/images/fedora-23-scratch.qcow2"
state: running
transient: false
edges: []
comment: "qemu-img create -b fedora-23.qcow2 -f qcow2 fedora-23-scratch.qcow2"
EOF
}
#tput cuu 1 && tput el # remove last line
str=''
tnuoc=$((maximum-count)) # backwards count
count2=$((count * factor))
tnuoc2=$((tnuoc * factor))
left=`yes '>' | head -$count2 | paste -s -d '' -`
right=`yes ' ' | head -$tnuoc2 | paste -s -d '' -`
str="${left}${right}"
_min=$((minimum-1))
_max=$((maximum+1))
reset # clean up once...
output $count # call function
while true; do
read -n1 -r -s -p "CPUs count is: $count; ${str} Press +/- key to adjust." key
if [ "$key" = "q" ] || [ "$key" = "Q" ]; then
echo # newline
exit
fi
if [ ! "$key" = "+" ] && [ ! "$key" = "-" ] && [ ! "$key" = "=" ] && [ ! "$key" = "_" ]; then # wrong key
reset # woops, reset it all...
continue
fi
if [ "$key" == "+" ] || [ "$key" == "=" ]; then
count=$((count+1))
fi
if [ "$key" == "-" ] || [ "$key" == "_" ]; then
count=$((count-1))
fi
if [ $count -eq $_min ]; then # min
count=$minimum
fi
if [ $count -eq $_max ]; then # max
count=$maximum
fi
tnuoc=$((maximum-count)) # backwards count
#echo "count is: $count"
#echo "tnuoc is: $tnuoc"
count2=$((count * factor))
tnuoc2=$((tnuoc * factor))
left=`yes '>' | head -$count2 | paste -s -d '' -`
right=`yes ' ' | head -$tnuoc2 | paste -s -d '' -`
str="${left}${right}"
#echo "str is: $str"
echo -ne '\r' # backup
output $count # call function
done

View File

@@ -31,7 +31,9 @@ import (
"github.com/purpleidea/mgmt/event"
multierr "github.com/hashicorp/go-multierror"
"github.com/libvirt/libvirt-go"
libvirtxml "github.com/libvirt/libvirt-go-xml"
errwrap "github.com/pkg/errors"
)
@@ -39,6 +41,17 @@ func init() {
gob.Register(&VirtRes{})
}
const (
// DefaultMaxCPUs is the default number of possible cpu "slots" used.
DefaultMaxCPUs = 32
// MaxShutdownDelayTimeout is the max time we wait for a vm to shutdown.
MaxShutdownDelayTimeout = 60 * 5 // seconds
// ShortPollInterval is how often we poll when expecting an event.
ShortPollInterval = 5 // seconds
)
var (
libvirtInitialized = false
)
@@ -65,6 +78,7 @@ type VirtRes struct {
State string `yaml:"state"` // running, paused, shutoff
Transient bool `yaml:"transient"` // defined (false) or undefined (true)
CPUs uint `yaml:"cpus"`
MaxCPUs uint `yaml:"maxcpus"`
Memory uint64 `yaml:"memory"` // in KBytes
OSInit string `yaml:"osinit"` // init used by lxc
Boot []string `yaml:"boot"` // boot order. values: fd, hd, cdrom, network
@@ -74,9 +88,19 @@ type VirtRes struct {
Filesystem []filesystemDevice `yaml:"filesystem"`
Auth *VirtAuth `yaml:"auth"`
HotCPUs bool `yaml:"hotcpus"` // allow hotplug of cpus?
// FIXME: values here should be enum's!
RestartOnDiverge string `yaml:"restartondiverge"` // restart policy: "ignore", "ifneeded", "error"
RestartOnRefresh bool `yaml:"restartonrefresh"` // restart on refresh?
conn *libvirt.Connect
version uint32 // major * 1000000 + minor * 1000 + release
absent bool // cached state
uriScheme virtURISchemeType
processExitWatch bool // do we want to wait on an explicit process exit?
processExitChan chan struct{}
restartScheduled bool // do we need to schedule a hard restart?
guestAgentConnected bool // our tracking of if guest agent is running
}
// Default returns some sensible defaults for this resource.
@@ -85,9 +109,22 @@ func (obj *VirtRes) Default() Res {
BaseRes: BaseRes{
MetaParams: DefaultMetaParams, // force a default
},
MaxCPUs: DefaultMaxCPUs,
HotCPUs: true, // we're a dynamic engine, be dynamic by default!
RestartOnDiverge: "error", // safest default :(
}
}
// Validate if the params passed in are valid data.
func (obj *VirtRes) Validate() error {
if obj.CPUs > obj.MaxCPUs {
return fmt.Errorf("the number of CPUs (%d) must not be greater than MaxCPUs (%d)", obj.CPUs, obj.MaxCPUs)
}
return obj.BaseRes.Validate()
}
// Init runs some startup code for this resource.
func (obj *VirtRes) Init() error {
if !libvirtInitialized {
@@ -108,15 +145,71 @@ func (obj *VirtRes) Init() error {
obj.absent = (obj.Transient && obj.State == "shutoff") // machine shouldn't exist
obj.conn, err = obj.connect() // gets closed in Close method of Res API
if err != nil {
return errwrap.Wrapf(err, "%s[%s]: Connection to libvirt failed in init", obj.Kind(), obj.GetName())
}
// check for hard to change properties
dom, err := obj.conn.LookupDomainByName(obj.GetName())
if err == nil {
defer dom.Free()
} else if !isNotFound(err) {
return errwrap.Wrapf(err, "%s[%s]: Could not lookup on init", obj.Kind(), obj.GetName())
}
if err == nil {
// maxCPUs, err := dom.GetMaxVcpus()
i, err := dom.GetVcpusFlags(libvirt.DOMAIN_VCPU_MAXIMUM)
if err != nil {
return errwrap.Wrapf(err, "%s[%s]: Could not lookup MaxCPUs on init", obj.Kind(), obj.GetName())
}
maxCPUs := uint(i)
if obj.MaxCPUs != maxCPUs { // max cpu slots is hard to change
// we'll need to reboot to fix this one...
obj.restartScheduled = true
}
// parse running domain xml to read properties
// FIXME: should we do this in Watch, after we register the
// event handlers so that we don't miss any events via race?
xmlDesc, err := dom.GetXMLDesc(0) // 0 means no flags
if err != nil {
return errwrap.Wrapf(err, "%s[%s]: Could not GetXMLDesc on init", obj.Kind(), obj.GetName())
}
domXML := &libvirtxml.Domain{}
if err := domXML.Unmarshal(xmlDesc); err != nil {
return errwrap.Wrapf(err, "%s[%s]: Could not unmarshal XML on init", obj.Kind(), obj.GetName())
}
// guest agent: domain->devices->channel->target->state == connected?
for _, x := range domXML.Devices.Channels {
if x.Target.Type == "virtio" && strings.HasPrefix(x.Target.Name, "org.qemu.guest_agent.") {
// last connection found wins (usually 1 anyways)
obj.guestAgentConnected = (x.Target.State == "connected")
}
}
}
obj.BaseRes.kind = "Virt"
return obj.BaseRes.Init() // call base init, b/c we're overriding
}
// Validate if the params passed in are valid data.
func (obj *VirtRes) Validate() error {
return obj.BaseRes.Validate()
// Close runs some cleanup code for this resource.
func (obj *VirtRes) Close() error {
// TODO: what is the first int Close return value useful for (if at all)?
_, err := obj.conn.Close() // close libvirt conn that was opened in Init
// call base close, b/c we're overriding
if e := obj.BaseRes.Close(); err == nil {
err = e
} else if e != nil {
err = multierr.Append(err, e) // list of errors
}
return err
}
// connect is the connect helper for the libvirt connection. It can handle auth.
func (obj *VirtRes) connect() (conn *libvirt.Connect, err error) {
if obj.Auth != nil {
callback := func(creds []*libvirt.ConnectCredential) {
@@ -139,21 +232,27 @@ func (obj *VirtRes) connect() (conn *libvirt.Connect, err error) {
Callback: callback,
}
conn, err = libvirt.NewConnectWithAuth(obj.URI, auth, 0)
if err == nil {
if version, err := conn.GetLibVersion(); err == nil {
obj.version = version
}
}
}
if obj.Auth == nil || err != nil {
conn, err = libvirt.NewConnect(obj.URI)
if err == nil {
if version, err := conn.GetLibVersion(); err == nil {
obj.version = version
}
}
}
return
}
// Watch is the primary listener for this resource and it outputs events.
func (obj *VirtRes) Watch(processChan chan *event.Event) error {
conn, err := obj.connect()
if err != nil {
return fmt.Errorf("Connection to libvirt failed with: %s", err)
}
eventChan := make(chan libvirt.DomainEventType) // TODO: do we need to buffer this?
domChan := make(chan libvirt.DomainEventType) // TODO: do we need to buffer this?
gaChan := make(chan *libvirt.DomainEventAgentLifecycle)
errorChan := make(chan error)
exitChan := make(chan struct{})
defer close(exitChan)
@@ -181,17 +280,32 @@ func (obj *VirtRes) Watch(processChan chan *event.Event) error {
}
}()
callback := func(c *libvirt.Connect, d *libvirt.Domain, ev *libvirt.DomainEventLifecycle) {
// domain events callback
domCallback := func(c *libvirt.Connect, d *libvirt.Domain, ev *libvirt.DomainEventLifecycle) {
domName, _ := d.GetName()
if domName == obj.GetName() {
eventChan <- ev.Event
domChan <- ev.Event
}
}
callbackID, err := conn.DomainEventLifecycleRegister(nil, callback)
// if dom is nil, we get events for *all* domains!
domCallbackID, err := obj.conn.DomainEventLifecycleRegister(nil, domCallback)
if err != nil {
return err
}
defer conn.DomainEventDeregister(callbackID)
defer obj.conn.DomainEventDeregister(domCallbackID)
// guest agent events callback
gaCallback := func(c *libvirt.Connect, d *libvirt.Domain, eva *libvirt.DomainEventAgentLifecycle) {
domName, _ := d.GetName()
if domName == obj.GetName() {
gaChan <- eva
}
}
gaCallbackID, err := obj.conn.DomainEventAgentLifecycleRegister(nil, gaCallback)
if err != nil {
return err
}
defer obj.conn.DomainEventDeregister(gaCallbackID)
// notify engine that we're running
if err := obj.Running(processChan); err != nil {
@@ -202,8 +316,9 @@ func (obj *VirtRes) Watch(processChan chan *event.Event) error {
var exit *error // if ptr exists, that is the exit error to return
for {
processExited := false // did the process exit fully (shutdown)?
select {
case event := <-eventChan:
case event := <-domChan:
// TODO: shouldn't we do these checks in CheckApply ?
switch event {
case libvirt.DOMAIN_EVENT_DEFINED:
@@ -235,11 +350,43 @@ func (obj *VirtRes) Watch(processChan chan *event.Event) error {
obj.StateOK(false) // dirty
send = true
}
processExited = true
case libvirt.DOMAIN_EVENT_PMSUSPENDED:
// FIXME: IIRC, in s3 we can't cold change
// hardware like cpus but in s4 it's okay?
// verify, detect and patch appropriately!
fallthrough
case libvirt.DOMAIN_EVENT_CRASHED:
obj.StateOK(false) // dirty
send = true
processExited = true // FIXME: is this okay for PMSUSPENDED ?
}
if obj.processExitWatch && processExited {
close(obj.processExitChan) // send signal
obj.processExitWatch = false
}
case agentEvent := <-gaChan:
state, reason := agentEvent.State, agentEvent.Reason
if state == libvirt.CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_STATE_CONNECTED {
obj.guestAgentConnected = true
obj.StateOK(false) // dirty
send = true
log.Printf("%s[%s]: Guest agent connected", obj.Kind(), obj.GetName())
} else if state == libvirt.CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_STATE_DISCONNECTED {
obj.guestAgentConnected = false
// ignore CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_REASON_DOMAIN_STARTED
// events because they just tell you that guest agent channel was added
if reason == libvirt.CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_REASON_CHANNEL {
log.Printf("%s[%s]: Guest agent disconnected", obj.Kind(), obj.GetName())
}
} else {
return fmt.Errorf("Unknown %s[%s] guest agent state: %v", obj.Kind(), obj.GetName(), state)
}
case err := <-errorChan:
@@ -258,51 +405,9 @@ func (obj *VirtRes) Watch(processChan chan *event.Event) error {
}
}
// attrCheckApply performs the CheckApply functions for CPU, Memory and others.
// This shouldn't be called when the machine is absent; it won't be found!
func (obj *VirtRes) attrCheckApply(apply bool) (bool, error) {
var checkOK = true
dom, err := obj.conn.LookupDomainByName(obj.GetName())
if err != nil {
return false, errwrap.Wrapf(err, "conn.LookupDomainByName failed")
}
domInfo, err := dom.GetInfo()
if err != nil {
// we don't know if the state is ok
return false, errwrap.Wrapf(err, "domain.GetInfo failed")
}
// check memory
if domInfo.Memory != obj.Memory {
checkOK = false
if !apply {
return false, nil
}
if err := dom.SetMemory(obj.Memory); err != nil {
return false, errwrap.Wrapf(err, "domain.SetMemory failed")
}
log.Printf("%s[%s]: Memory changed", obj.Kind(), obj.GetName())
}
// check cpus
if domInfo.NrVirtCpu != obj.CPUs {
checkOK = false
if !apply {
return false, nil
}
if err := dom.SetVcpus(obj.CPUs); err != nil {
return false, errwrap.Wrapf(err, "domain.SetVcpus failed")
}
log.Printf("%s[%s]: CPUs changed", obj.Kind(), obj.GetName())
}
return checkOK, nil
}
// domainCreate creates a transient or persistent domain in the correct state. It
// doesn't check the state before hand, as it is a simple helper function.
// domainCreate creates a transient or persistent domain in the correct state.
// It doesn't check the state before hand, as it is a simple helper function.
// The caller must run dom.Free() after use, when error was returned as nil.
func (obj *VirtRes) domainCreate() (*libvirt.Domain, bool, error) {
if obj.Transient {
@@ -350,83 +455,20 @@ func (obj *VirtRes) domainCreate() (*libvirt.Domain, bool, error) {
return dom, false, nil
}
// CheckApply checks the resource state and applies the resource if the bool
// input is true. It returns error info and if the state check passed or not.
func (obj *VirtRes) CheckApply(apply bool) (bool, error) {
var err error
obj.conn, err = obj.connect()
if err != nil {
return false, fmt.Errorf("Connection to libvirt failed with: %s", err)
}
// stateCheckApply starts, stops, or pauses/unpauses the domain as needed.
func (obj *VirtRes) stateCheckApply(apply bool, dom *libvirt.Domain) (bool, error) {
var checkOK = true
dom, err := obj.conn.LookupDomainByName(obj.GetName())
if err == nil {
// pass
} else if virErr, ok := err.(libvirt.Error); ok && virErr.Code == libvirt.ERR_NO_DOMAIN {
// domain not found
if obj.absent {
return true, nil
}
if !apply {
return false, nil
}
var c = true
dom, c, err = obj.domainCreate() // create the domain
if err != nil {
return false, errwrap.Wrapf(err, "domainCreate failed")
} else if !c {
checkOK = false
}
} else {
return false, errwrap.Wrapf(err, "LookupDomainByName failed")
}
defer dom.Free()
// domain exists
domInfo, err := dom.GetInfo()
if err != nil {
// we don't know if the state is ok
return false, errwrap.Wrapf(err, "domain.GetInfo failed")
}
isPersistent, err := dom.IsPersistent()
if err != nil {
// we don't know if the state is ok
return false, errwrap.Wrapf(err, "domain.IsPersistent failed")
}
isActive, err := dom.IsActive()
if err != nil {
// we don't know if the state is ok
return false, errwrap.Wrapf(err, "domain.IsActive failed")
}
// check for persistence
if isPersistent == obj.Transient { // if they're different!
if !apply {
return false, nil
}
if isPersistent {
if err := dom.Undefine(); err != nil {
return false, errwrap.Wrapf(err, "domain.Undefine failed")
}
log.Printf("%s[%s]: Domain undefined", obj.Kind(), obj.GetName())
} else {
domXML, err := dom.GetXMLDesc(libvirt.DOMAIN_XML_INACTIVE)
if err != nil {
return false, errwrap.Wrapf(err, "domain.GetXMLDesc failed")
}
if _, err = obj.conn.DomainDefineXML(domXML); err != nil {
return false, errwrap.Wrapf(err, "conn.DomainDefineXML failed")
}
log.Printf("%s[%s]: Domain defined", obj.Kind(), obj.GetName())
}
checkOK = false
}
// check for valid state
switch obj.State {
case "running":
@@ -490,6 +532,283 @@ func (obj *VirtRes) CheckApply(apply bool) (bool, error) {
log.Printf("%s[%s]: Domain destroyed", obj.Kind(), obj.GetName())
}
return checkOK, nil
}
// attrCheckApply performs the CheckApply functions for CPU, Memory and others.
// This shouldn't be called when the machine is absent; it won't be found!
func (obj *VirtRes) attrCheckApply(apply bool, dom *libvirt.Domain) (bool, error) {
var checkOK = true
domInfo, err := dom.GetInfo()
if err != nil {
// we don't know if the state is ok
return false, errwrap.Wrapf(err, "domain.GetInfo failed")
}
// check (balloon) memory
// FIXME: check that we don't increase past max memory...
if domInfo.Memory != obj.Memory {
if !apply {
return false, nil
}
checkOK = false
if err := dom.SetMemory(obj.Memory); err != nil {
return false, errwrap.Wrapf(err, "domain.SetMemory failed")
}
log.Printf("%s[%s]: Memory changed to %d", obj.Kind(), obj.GetName(), obj.Memory)
}
// check cpus
if domInfo.NrVirtCpu != obj.CPUs {
if !apply {
return false, nil
}
// unused: DOMAIN_VCPU_CURRENT
switch domInfo.State {
case libvirt.DOMAIN_PAUSED:
// we can queue up the SetVcpus operation,
// which will be seen once vm is unpaused!
fallthrough
case libvirt.DOMAIN_RUNNING:
// cpu hot*un*plug introduced in 2.2.0
// 2 * 1000000 + 2 * 1000 + 0 = 2002000
if obj.HotCPUs && obj.version < 2002000 && domInfo.NrVirtCpu > obj.CPUs {
return false, fmt.Errorf("libvirt 2.2.0 or greater is required to hotunplug cpus")
}
// pkrempa says HOTPLUGGABLE is implied when doing LIVE
// on running machine, but we add it anyways in case we
// race and the machine is in shutoff state. We need to
// specify HOTPLUGGABLE if we add while machine is off!
// We particularly need to add HOTPLUGGABLE with CONFIG
flags := libvirt.DOMAIN_VCPU_LIVE
if !obj.Transient {
flags |= libvirt.DOMAIN_VCPU_CONFIG
// hotpluggable flag introduced in 2.4.0
// 2 * 1000000 + 4 * 1000 + 0 = 2004000
if obj.version >= 2004000 {
flags |= libvirt.DOMAIN_VCPU_HOTPLUGGABLE
}
}
if err := dom.SetVcpusFlags(obj.CPUs, flags); err != nil {
return false, errwrap.Wrapf(err, "domain.SetVcpus failed")
}
checkOK = false
log.Printf("%s[%s]: CPUs (hot) changed to %d", obj.Kind(), obj.GetName(), obj.CPUs)
case libvirt.DOMAIN_SHUTOFF, libvirt.DOMAIN_SHUTDOWN:
if !obj.Transient {
flags := libvirt.DOMAIN_VCPU_CONFIG
if obj.version >= 2004000 {
flags |= libvirt.DOMAIN_VCPU_HOTPLUGGABLE
}
if err := dom.SetVcpusFlags(obj.CPUs, flags); err != nil {
return false, errwrap.Wrapf(err, "domain.SetVcpus failed")
}
checkOK = false
log.Printf("%s[%s]: CPUs (cold) changed to %d", obj.Kind(), obj.GetName(), obj.CPUs)
}
default:
// FIXME: is this accurate?
return false, fmt.Errorf("can't modify cpus when in %v", domInfo.State)
}
}
// modify the online aspect of the cpus with qemu-guest-agent
if obj.HotCPUs && obj.guestAgentConnected && domInfo.State != libvirt.DOMAIN_PAUSED {
// if hotplugging a cpu without the guest agent, you might need:
// manually to: echo 1 > /sys/devices/system/cpu/cpu1/online OR
// udev (untested) in: /etc/udev/rules.d/99-hotplugCPU.rules
// SUBSYSTEM=="cpu",ACTION=="add",RUN+="/bin/sh -c '[ ! -e /sys$devpath/online ] || echo 1 > /sys$devpath/online'"
// how many online cpus are there?
i, err := dom.GetVcpusFlags(libvirt.DOMAIN_VCPU_GUEST)
if err != nil {
return false, errwrap.Wrapf(err, "domain.GetVcpus failed from qemu-guest-agent")
}
onlineCPUs := uint(i)
if onlineCPUs != obj.CPUs {
if !apply {
return false, nil
}
if err := dom.SetVcpusFlags(obj.CPUs, libvirt.DOMAIN_VCPU_GUEST); err != nil {
return false, errwrap.Wrapf(err, "domain.SetVcpus failed")
}
checkOK = false
log.Printf("%s[%s]: CPUs (guest) changed to %d", obj.Kind(), obj.GetName(), obj.CPUs)
}
}
return checkOK, nil
}
// domainShutdownSync powers off a domain in a manner which will allow hardware
// to be changed while off. This requires the process to exit so that when it's
// called again, qemu can start up fresh as if we cold swapped in new hardware!
// This method is particularly special because it waits for shutdown to finish.
func (obj *VirtRes) domainShutdownSync(apply bool, dom *libvirt.Domain) (bool, error) {
// we need to wait for shutdown to be finished before we can restart it
once := true
timeout := time.After(time.Duration(MaxShutdownDelayTimeout) * time.Second)
// wait until shutdown completion...
for {
domInfo, err := dom.GetInfo()
if err != nil {
// we don't know if the state is ok
return false, errwrap.Wrapf(err, "domain.GetInfo failed")
}
if domInfo.State == libvirt.DOMAIN_SHUTOFF || domInfo.State == libvirt.DOMAIN_SHUTDOWN {
log.Printf("%s[%s]: Shutdown", obj.Kind(), obj.GetName())
break
}
if once {
if !apply {
return false, nil
}
obj.processExitWatch = true
obj.processExitChan = make(chan struct{})
// if machine shuts down before we call this, we error;
// this isn't ideal, but it happened due to user error!
log.Printf("%s[%s]: Running shutdown", obj.Kind(), obj.GetName())
if err := dom.Shutdown(); err != nil {
// FIXME: if machine is already shutdown completely, return early
return false, errwrap.Wrapf(err, "domain.Shutdown failed")
}
once = false // we did some work!
}
select {
case <-obj.processExitChan: // should get a close signal...
// pass
case <-time.After(time.Duration(ShortPollInterval) * time.Second):
// poll until timeout in case no event ever arrives
// this happens when using Meta().Poll for example!
// FIXME: some domains can reboot when asked to shutdown
// via the `on_poweroff` xml setting. in this case, we
// might only exit from here via timeout... avoid this!
// https://libvirt.org/formatdomain.html#elementsEvents
continue
case <-timeout:
return false, fmt.Errorf("%s[%s]: didn't shutdown after %d seconds", obj.Kind(), obj.GetName(), MaxShutdownDelayTimeout)
}
}
return once, nil
}
// CheckApply checks the resource state and applies the resource if the bool
// input is true. It returns error info and if the state check passed or not.
func (obj *VirtRes) CheckApply(apply bool) (bool, error) {
// if we do the restart, we must flip the flag back to false as evidence
var restart bool // do we need to do a restart?
if obj.RestartOnRefresh && obj.Refresh() { // a refresh is a restart ask
restart = true
}
// we need to restart in all situations except ignore. the "error" case
// means that if a restart is actually needed, we should return an error
if obj.restartScheduled && obj.RestartOnDiverge != "ignore" { // "ignore", "ifneeded", "error"
restart = true
}
if !apply {
restart = false
}
var checkOK = true
dom, err := obj.conn.LookupDomainByName(obj.GetName())
if err == nil {
// pass
} else if isNotFound(err) {
// domain not found
if obj.absent {
// we can ignore the restart var since we're not running
return true, nil
}
if !apply {
return false, nil
}
var c = true
dom, c, err = obj.domainCreate() // create the domain
if err != nil {
return false, errwrap.Wrapf(err, "domainCreate failed")
} else if !c {
checkOK = false
}
} else {
return false, errwrap.Wrapf(err, "LookupDomainByName failed")
}
defer dom.Free() // the Free() for two possible domain objects above
// domain now exists
isPersistent, err := dom.IsPersistent()
if err != nil {
// we don't know if the state is ok
return false, errwrap.Wrapf(err, "domain.IsPersistent failed")
}
// check for persistence
if isPersistent == obj.Transient { // if they're different!
if !apply {
return false, nil
}
if isPersistent {
if err := dom.Undefine(); err != nil {
return false, errwrap.Wrapf(err, "domain.Undefine failed")
}
log.Printf("%s[%s]: Domain undefined", obj.Kind(), obj.GetName())
} else {
domXML, err := dom.GetXMLDesc(libvirt.DOMAIN_XML_INACTIVE)
if err != nil {
return false, errwrap.Wrapf(err, "domain.GetXMLDesc failed")
}
if _, err = obj.conn.DomainDefineXML(domXML); err != nil {
return false, errwrap.Wrapf(err, "conn.DomainDefineXML failed")
}
log.Printf("%s[%s]: Domain defined", obj.Kind(), obj.GetName())
}
checkOK = false
}
// shutdown here and let the stateCheckApply fix things up...
// TODO: i think this is the most straight forward process...
if !obj.absent && restart {
if c, err := obj.domainShutdownSync(apply, dom); err != nil {
return false, errwrap.Wrapf(err, "domainShutdownSync failed")
} else if !c {
checkOK = false
restart = false // clear the restart requirement...
}
}
// FIXME: is doing this early check (therefore twice total) a good idea?
// run additional pre-emptive attr change checks here for hotplug stuff!
if !obj.absent {
if c, err := obj.attrCheckApply(apply, dom); err != nil {
return false, errwrap.Wrapf(err, "early attrCheckApply failed")
} else if !c {
checkOK = false
}
}
// TODO: do we need to run again below after we've booted up the domain?
// apply correct machine state, eg: startup/shutoff/pause as needed
if c, err := obj.stateCheckApply(apply, dom); err != nil {
return false, errwrap.Wrapf(err, "stateCheckApply failed")
} else if !c {
checkOK = false
}
// FIXME: should we wait to ensure machine is booted before continuing?
// it may be useful to wait for guest agent to hotplug some ram or cpu!
if !apply {
return false, nil
}
@@ -497,17 +816,22 @@ func (obj *VirtRes) CheckApply(apply bool) (bool, error) {
// mem & cpu checks...
if !obj.absent {
if c, err := obj.attrCheckApply(apply); err != nil {
if c, err := obj.attrCheckApply(apply, dom); err != nil {
return false, errwrap.Wrapf(err, "attrCheckApply failed")
} else if !c {
checkOK = false
}
}
// we had to do a restart, we didn't, and we should error if it was needed
if obj.restartScheduled && restart == true && obj.RestartOnDiverge == "error" {
return false, fmt.Errorf("%s[%s]: needed restart but didn't! (RestartOnDiverge: %v)", obj.Kind(), obj.GetName(), obj.RestartOnDiverge)
}
return checkOK, nil // w00t
}
// Return the correct domain type based on the uri
// getDomainType returns the correct domain type based on the uri.
func (obj VirtRes) getDomainType() string {
switch obj.uriScheme {
case lxcURI:
@@ -517,7 +841,7 @@ func (obj VirtRes) getDomainType() string {
}
}
// Return the correct os type based on the uri
// getOSType returns the correct os type based on the uri.
func (obj VirtRes) getOSType() string {
switch obj.uriScheme {
case lxcURI:
@@ -536,13 +860,30 @@ func (obj VirtRes) getOSInit() string {
}
}
// getDomainXML returns the representative XML for a domain struct.
// FIXME: replace this with the libvirt-go-xml package instead!
func (obj *VirtRes) getDomainXML() string {
var b string
b += obj.getDomainType() // start domain
b += fmt.Sprintf("<name>%s</name>", obj.GetName())
b += fmt.Sprintf("<memory unit='KiB'>%d</memory>", obj.Memory)
if obj.HotCPUs {
b += fmt.Sprintf("<vcpu current='%d'>%d</vcpu>", obj.CPUs, obj.MaxCPUs)
b += fmt.Sprintf("<vcpus>")
b += fmt.Sprintf("<vcpu id='0' enabled='yes' hotpluggable='no' order='1'/>") // zeroth cpu can't change
for i := uint(1); i < obj.MaxCPUs; i++ { // skip first entry
enabled := "no"
if i < obj.CPUs {
enabled = "yes"
}
b += fmt.Sprintf("<vcpu id='%d' enabled='%s' hotpluggable='yes'/>", i, enabled)
}
b += fmt.Sprintf("</vcpus>")
} else {
b += fmt.Sprintf("<vcpu>%d</vcpu>", obj.CPUs)
}
b += "<os>"
b += obj.getOSType()
@@ -554,6 +895,13 @@ func (obj *VirtRes) getDomainXML() string {
}
b += fmt.Sprintf("</os>")
if obj.HotCPUs {
// acpi is needed for cpu hotplug support
b += fmt.Sprintf("<features>")
b += fmt.Sprintf("<acpi/>")
b += fmt.Sprintf("</features>")
}
b += fmt.Sprintf("<devices>") // start devices
if obj.Disk != nil {
@@ -580,6 +928,16 @@ func (obj *VirtRes) getDomainXML() string {
}
}
// qemu guest agent is not *required* for hotplugging cpus but
// it helps because it can ask the host to make them online...
if obj.HotCPUs {
// enable qemu guest agent (on the host side)
b += fmt.Sprintf("<channel type='unix'>")
b += fmt.Sprintf("<source mode='bind'/>")
b += fmt.Sprintf("<target type='virtio' name='org.qemu.guest_agent.0'/>")
b += fmt.Sprintf("</channel>")
}
b += "<serial type='pty'><target port='0'/></serial>"
b += "<console type='pty'><target type='serial' port='0'/></console>"
b += "</devices>" // end devices
@@ -718,6 +1076,12 @@ func (obj *VirtRes) Compare(res Res) bool {
if obj.CPUs != res.CPUs {
return false
}
// we can't change this property while machine is running!
// we do need to return false, so that a new struct gets built,
// which will cause at least one Init() & CheckApply() to run.
if obj.MaxCPUs != res.MaxCPUs {
return false
}
// TODO: can we skip the compare of certain properties such as
// Memory because this object (but with different memory) can be
// *converted* into the new version that has more/less memory?
@@ -748,10 +1112,6 @@ func (obj *VirtRes) Compare(res Res) bool {
return true
}
// CollectPattern applies the pattern for collection resources.
func (obj *VirtRes) CollectPattern(string) {
}
// UnmarshalYAML is the custom unmarshal handler for this struct.
// It is primarily useful for setting the defaults.
func (obj *VirtRes) UnmarshalYAML(unmarshal func(interface{}) error) error {
@@ -781,6 +1141,18 @@ func randMAC() string {
fmt.Sprintf(":%x", rand.Intn(255))
}
// isNotFound tells us if this is a domain not found error.
func isNotFound(err error) bool {
if err == nil {
return false
}
if virErr, ok := err.(libvirt.Error); ok && virErr.Code == libvirt.ERR_NO_DOMAIN {
// domain not found
return true
}
return false // some other error
}
// expandHome does a simple expansion of the tilde into your $HOME value.
func expandHome(p string) (string, error) {
// TODO: this doesn't match strings of the form: ~james/...

View File

@@ -14,7 +14,7 @@ ROOT=$(dirname "${BASH_SOURCE}")/..
cd "${ROOT}"
find_files() {
git ls-files | grep -e '\.sh$' -e '\.bash$'
git ls-files | grep -e '\.sh$' -e '\.bash$' | grep -v 'misc/delta-cpu.sh'
}
bad_files=$(