prometheus: Implement mgmt_checkapply_total metric

Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu>
This commit is contained in:
Julien Pivotto
2017-02-12 21:24:58 +01:00
parent bdb8368e89
commit e8855f7621
6 changed files with 113 additions and 22 deletions

View File

@@ -438,9 +438,10 @@ func (obj *Main) Run() error {
newGraph.Flags = pgraph.Flags{Debug: obj.Flags.Debug}
// pass in the information we need
newGraph.AssociateData(&resources.Data{
Converger: converger,
Prefix: pgraphPrefix,
Debug: obj.Flags.Debug,
Converger: converger,
Prometheus: prom,
Prefix: pgraphPrefix,
Debug: obj.Flags.Debug,
})
// apply the global noop parameter if requested

View File

@@ -224,6 +224,12 @@ func (g *Graph) Process(v *Vertex) error {
// if this fails, don't UpdateTimestamp()
checkOK, err = obj.CheckApply(!noop)
if obj.Prometheus() != nil {
if promErr := obj.Prometheus().UpdateCheckApplyTotal(obj.Kind(), !noop, !checkOK, err != nil); promErr != nil {
// TODO: how to error correctly
log.Printf("%s[%s]: Prometheus.UpdateCheckApplyTotal() errored: %v", v.Kind(), v.GetName(), err)
}
}
// TODO: Can the `Poll` converged timeout tracking be a
// more general method for all converged timeouts? this
// would simplify the resources by removing boilerplate

View File

@@ -21,7 +21,9 @@ package prometheus
import (
"net/http"
"strconv"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
@@ -33,6 +35,9 @@ const DefaultPrometheusListen = "127.0.0.1:9233"
// prometheus instance. Run Init() on it.
type Prometheus struct {
Listen string // the listen specification for the net/http server
checkApplyTotal *prometheus.CounterVec // total of CheckApplies that have been triggered
}
// Init some parameters - currently the Listen address.
@@ -40,6 +45,20 @@ func (obj *Prometheus) Init() error {
if len(obj.Listen) == 0 {
obj.Listen = DefaultPrometheusListen
}
obj.checkApplyTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "mgmt_checkapply_total",
Help: "Number of CheckApply that have run.",
},
// Labels for this metric.
// kind: resource type: Svc, File, ...
// apply: if the CheckApply happened in "apply" mode
// eventful: did the CheckApply generate an event
// errorful: did the CheckApply generate an error
[]string{"kind", "apply", "eventful", "errorful"},
)
prometheus.MustRegister(obj.checkApplyTotal)
return nil
}
@@ -57,3 +76,12 @@ func (obj *Prometheus) Stop() error {
// https://stackoverflow.com/questions/39320025/go-how-to-stop-http-listenandserve/41433555#41433555
return nil
}
// UpdateCheckApplyTotal refreshes the failing gauge by parsing the internal
// state map.
func (obj *Prometheus) UpdateCheckApplyTotal(kind string, apply, eventful, errorful bool) error {
labels := prometheus.Labels{"kind": kind, "apply": strconv.FormatBool(apply), "eventful": strconv.FormatBool(eventful), "errorful": strconv.FormatBool(errorful)}
metric := obj.checkApplyTotal.With(labels)
metric.Inc()
return nil
}

View File

@@ -33,6 +33,7 @@ import (
// TODO: should each resource be a sub-package?
"github.com/purpleidea/mgmt/converger"
"github.com/purpleidea/mgmt/event"
"github.com/purpleidea/mgmt/prometheus"
errwrap "github.com/pkg/errors"
"golang.org/x/time/rate"
@@ -57,9 +58,10 @@ const refreshPathToken = "refresh"
type Data struct {
//Hostname string // uuid for the host
//Noop bool
Converger converger.Converger
Prefix string // the prefix to be used for the pgraph namespace
Debug bool
Converger converger.Converger
Prometheus *prometheus.Prometheus
Prefix string // the prefix to be used for the pgraph namespace
Debug bool
// NOTE: we can add more fields here if needed for the resources.
}
@@ -164,6 +166,7 @@ type Base interface {
Started() <-chan struct{} // returns when the resource has started
Starter(bool)
Poll(chan *event.Event) error // poll alternative to watching :(
Prometheus() *prometheus.Prometheus
}
// Res is the minimum interface you need to implement to define a new resource.
@@ -188,22 +191,23 @@ type BaseRes struct {
MetaParams MetaParams `yaml:"meta"` // struct of all the metaparams
Recv map[string]*Send // mapping of key to receive on from value
kind string
mutex *sync.Mutex // locks around sending and closing of events channel
events chan *event.Event
converger converger.Converger // converged tracking
cuid converger.ConvergerUID
prefix string // base prefix for this resource
debug bool
state ResState
working bool // is the Worker() loop running ?
started chan struct{} // closed when worker is started/running
isStarted bool // did the started chan already close?
starter bool // does this have indegree == 0 ? XXX: usually?
isStateOK bool // whether the state is okay based on events or not
isGrouped bool // am i contained within a group?
grouped []Res // list of any grouped resources
refresh bool // does this resource have a refresh to run?
kind string
mutex *sync.Mutex // locks around sending and closing of events channel
events chan *event.Event
converger converger.Converger // converged tracking
cuid converger.ConvergerUID
prometheus *prometheus.Prometheus
prefix string // base prefix for this resource
debug bool
state ResState
working bool // is the Worker() loop running ?
started chan struct{} // closed when worker is started/running
isStarted bool // did the started chan already close?
starter bool // does this have indegree == 0 ? XXX: usually?
isStateOK bool // whether the state is okay based on events or not
isGrouped bool // am i contained within a group?
grouped []Res // list of any grouped resources
refresh bool // does this resource have a refresh to run?
//refreshState StatefulBool // TODO: future stateful bool
}
@@ -348,6 +352,7 @@ func (obj *BaseRes) Events() chan *event.Event {
// AssociateData associates some data with the object in question.
func (obj *BaseRes) AssociateData(data *Data) {
obj.converger = data.Converger
obj.prometheus = data.Prometheus
obj.prefix = data.Prefix
obj.debug = data.Debug
}
@@ -561,6 +566,11 @@ func (obj *BaseRes) Poll(processChan chan *event.Event) error {
}
}
// Prometheus returns the prometheus instance.
func (obj *BaseRes) Prometheus() *prometheus.Prometheus {
return obj.prometheus
}
// ResToB64 encodes a resource to a base64 encoded string (after serialization)
func ResToB64(res Res) (string, error) {
b := bytes.Buffer{}

20
test/shell/prometheus-3.sh Executable file
View File

@@ -0,0 +1,20 @@
#!/bin/bash -e
# run empty graph, with prometheus support
timeout --kill-after=20s 15s ./mgmt run --tmp-prefix --prometheus --yaml prometheus-3.yaml &
pid=$!
sleep 10s # let it converge
# For test debugging purpose
curl 127.0.0.1:9233/metrics
# Three CheckApply for a File ; with events
curl 127.0.0.1:9233/metrics | grep '^mgmt_checkapply_total{apply="true",errorful="false",eventful="true",kind="File"} 3$'
# One CheckApply for a File ; in noop mode.
curl 127.0.0.1:9233/metrics | grep '^mgmt_checkapply_total{apply="false",errorful="false",eventful="true",kind="File"} 1$'
killall -SIGINT mgmt # send ^C to exit mgmt
wait $pid # get exit status
exit $?

View File

@@ -0,0 +1,26 @@
---
graph: mygraph
resources:
file:
- name: file1
path: "/tmp/mgmt/f1"
content: |
i am f1
state: exists
- name: file2
path: "/tmp/mgmt/f2"
content: |
i am f2
state: exists
- name: file3
path: "/tmp/mgmt/f3"
content: |
i am f3
state: exists
- name: file4
path: "/tmp/mgmt/f4"
content: |
i am f4
state: exists
meta:
noop: true