prometheus: Implement mgmt_checkapply_total metric
Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu>
This commit is contained in:
@@ -438,9 +438,10 @@ func (obj *Main) Run() error {
|
||||
newGraph.Flags = pgraph.Flags{Debug: obj.Flags.Debug}
|
||||
// pass in the information we need
|
||||
newGraph.AssociateData(&resources.Data{
|
||||
Converger: converger,
|
||||
Prefix: pgraphPrefix,
|
||||
Debug: obj.Flags.Debug,
|
||||
Converger: converger,
|
||||
Prometheus: prom,
|
||||
Prefix: pgraphPrefix,
|
||||
Debug: obj.Flags.Debug,
|
||||
})
|
||||
|
||||
// apply the global noop parameter if requested
|
||||
|
||||
@@ -224,6 +224,12 @@ func (g *Graph) Process(v *Vertex) error {
|
||||
// if this fails, don't UpdateTimestamp()
|
||||
checkOK, err = obj.CheckApply(!noop)
|
||||
|
||||
if obj.Prometheus() != nil {
|
||||
if promErr := obj.Prometheus().UpdateCheckApplyTotal(obj.Kind(), !noop, !checkOK, err != nil); promErr != nil {
|
||||
// TODO: how to error correctly
|
||||
log.Printf("%s[%s]: Prometheus.UpdateCheckApplyTotal() errored: %v", v.Kind(), v.GetName(), err)
|
||||
}
|
||||
}
|
||||
// TODO: Can the `Poll` converged timeout tracking be a
|
||||
// more general method for all converged timeouts? this
|
||||
// would simplify the resources by removing boilerplate
|
||||
|
||||
@@ -21,7 +21,9 @@ package prometheus
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strconv"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
||||
@@ -33,6 +35,9 @@ const DefaultPrometheusListen = "127.0.0.1:9233"
|
||||
// prometheus instance. Run Init() on it.
|
||||
type Prometheus struct {
|
||||
Listen string // the listen specification for the net/http server
|
||||
|
||||
checkApplyTotal *prometheus.CounterVec // total of CheckApplies that have been triggered
|
||||
|
||||
}
|
||||
|
||||
// Init some parameters - currently the Listen address.
|
||||
@@ -40,6 +45,20 @@ func (obj *Prometheus) Init() error {
|
||||
if len(obj.Listen) == 0 {
|
||||
obj.Listen = DefaultPrometheusListen
|
||||
}
|
||||
obj.checkApplyTotal = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "mgmt_checkapply_total",
|
||||
Help: "Number of CheckApply that have run.",
|
||||
},
|
||||
// Labels for this metric.
|
||||
// kind: resource type: Svc, File, ...
|
||||
// apply: if the CheckApply happened in "apply" mode
|
||||
// eventful: did the CheckApply generate an event
|
||||
// errorful: did the CheckApply generate an error
|
||||
[]string{"kind", "apply", "eventful", "errorful"},
|
||||
)
|
||||
prometheus.MustRegister(obj.checkApplyTotal)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -57,3 +76,12 @@ func (obj *Prometheus) Stop() error {
|
||||
// https://stackoverflow.com/questions/39320025/go-how-to-stop-http-listenandserve/41433555#41433555
|
||||
return nil
|
||||
}
|
||||
|
||||
// UpdateCheckApplyTotal refreshes the failing gauge by parsing the internal
|
||||
// state map.
|
||||
func (obj *Prometheus) UpdateCheckApplyTotal(kind string, apply, eventful, errorful bool) error {
|
||||
labels := prometheus.Labels{"kind": kind, "apply": strconv.FormatBool(apply), "eventful": strconv.FormatBool(eventful), "errorful": strconv.FormatBool(errorful)}
|
||||
metric := obj.checkApplyTotal.With(labels)
|
||||
metric.Inc()
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -33,6 +33,7 @@ import (
|
||||
// TODO: should each resource be a sub-package?
|
||||
"github.com/purpleidea/mgmt/converger"
|
||||
"github.com/purpleidea/mgmt/event"
|
||||
"github.com/purpleidea/mgmt/prometheus"
|
||||
|
||||
errwrap "github.com/pkg/errors"
|
||||
"golang.org/x/time/rate"
|
||||
@@ -57,9 +58,10 @@ const refreshPathToken = "refresh"
|
||||
type Data struct {
|
||||
//Hostname string // uuid for the host
|
||||
//Noop bool
|
||||
Converger converger.Converger
|
||||
Prefix string // the prefix to be used for the pgraph namespace
|
||||
Debug bool
|
||||
Converger converger.Converger
|
||||
Prometheus *prometheus.Prometheus
|
||||
Prefix string // the prefix to be used for the pgraph namespace
|
||||
Debug bool
|
||||
// NOTE: we can add more fields here if needed for the resources.
|
||||
}
|
||||
|
||||
@@ -164,6 +166,7 @@ type Base interface {
|
||||
Started() <-chan struct{} // returns when the resource has started
|
||||
Starter(bool)
|
||||
Poll(chan *event.Event) error // poll alternative to watching :(
|
||||
Prometheus() *prometheus.Prometheus
|
||||
}
|
||||
|
||||
// Res is the minimum interface you need to implement to define a new resource.
|
||||
@@ -188,22 +191,23 @@ type BaseRes struct {
|
||||
MetaParams MetaParams `yaml:"meta"` // struct of all the metaparams
|
||||
Recv map[string]*Send // mapping of key to receive on from value
|
||||
|
||||
kind string
|
||||
mutex *sync.Mutex // locks around sending and closing of events channel
|
||||
events chan *event.Event
|
||||
converger converger.Converger // converged tracking
|
||||
cuid converger.ConvergerUID
|
||||
prefix string // base prefix for this resource
|
||||
debug bool
|
||||
state ResState
|
||||
working bool // is the Worker() loop running ?
|
||||
started chan struct{} // closed when worker is started/running
|
||||
isStarted bool // did the started chan already close?
|
||||
starter bool // does this have indegree == 0 ? XXX: usually?
|
||||
isStateOK bool // whether the state is okay based on events or not
|
||||
isGrouped bool // am i contained within a group?
|
||||
grouped []Res // list of any grouped resources
|
||||
refresh bool // does this resource have a refresh to run?
|
||||
kind string
|
||||
mutex *sync.Mutex // locks around sending and closing of events channel
|
||||
events chan *event.Event
|
||||
converger converger.Converger // converged tracking
|
||||
cuid converger.ConvergerUID
|
||||
prometheus *prometheus.Prometheus
|
||||
prefix string // base prefix for this resource
|
||||
debug bool
|
||||
state ResState
|
||||
working bool // is the Worker() loop running ?
|
||||
started chan struct{} // closed when worker is started/running
|
||||
isStarted bool // did the started chan already close?
|
||||
starter bool // does this have indegree == 0 ? XXX: usually?
|
||||
isStateOK bool // whether the state is okay based on events or not
|
||||
isGrouped bool // am i contained within a group?
|
||||
grouped []Res // list of any grouped resources
|
||||
refresh bool // does this resource have a refresh to run?
|
||||
//refreshState StatefulBool // TODO: future stateful bool
|
||||
}
|
||||
|
||||
@@ -348,6 +352,7 @@ func (obj *BaseRes) Events() chan *event.Event {
|
||||
// AssociateData associates some data with the object in question.
|
||||
func (obj *BaseRes) AssociateData(data *Data) {
|
||||
obj.converger = data.Converger
|
||||
obj.prometheus = data.Prometheus
|
||||
obj.prefix = data.Prefix
|
||||
obj.debug = data.Debug
|
||||
}
|
||||
@@ -561,6 +566,11 @@ func (obj *BaseRes) Poll(processChan chan *event.Event) error {
|
||||
}
|
||||
}
|
||||
|
||||
// Prometheus returns the prometheus instance.
|
||||
func (obj *BaseRes) Prometheus() *prometheus.Prometheus {
|
||||
return obj.prometheus
|
||||
}
|
||||
|
||||
// ResToB64 encodes a resource to a base64 encoded string (after serialization)
|
||||
func ResToB64(res Res) (string, error) {
|
||||
b := bytes.Buffer{}
|
||||
|
||||
20
test/shell/prometheus-3.sh
Executable file
20
test/shell/prometheus-3.sh
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
# run empty graph, with prometheus support
|
||||
timeout --kill-after=20s 15s ./mgmt run --tmp-prefix --prometheus --yaml prometheus-3.yaml &
|
||||
pid=$!
|
||||
sleep 10s # let it converge
|
||||
|
||||
# For test debugging purpose
|
||||
curl 127.0.0.1:9233/metrics
|
||||
|
||||
# Three CheckApply for a File ; with events
|
||||
curl 127.0.0.1:9233/metrics | grep '^mgmt_checkapply_total{apply="true",errorful="false",eventful="true",kind="File"} 3$'
|
||||
|
||||
# One CheckApply for a File ; in noop mode.
|
||||
curl 127.0.0.1:9233/metrics | grep '^mgmt_checkapply_total{apply="false",errorful="false",eventful="true",kind="File"} 1$'
|
||||
|
||||
|
||||
killall -SIGINT mgmt # send ^C to exit mgmt
|
||||
wait $pid # get exit status
|
||||
exit $?
|
||||
26
test/shell/prometheus-3.yaml
Normal file
26
test/shell/prometheus-3.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
---
|
||||
graph: mygraph
|
||||
resources:
|
||||
file:
|
||||
- name: file1
|
||||
path: "/tmp/mgmt/f1"
|
||||
content: |
|
||||
i am f1
|
||||
state: exists
|
||||
- name: file2
|
||||
path: "/tmp/mgmt/f2"
|
||||
content: |
|
||||
i am f2
|
||||
state: exists
|
||||
- name: file3
|
||||
path: "/tmp/mgmt/f3"
|
||||
content: |
|
||||
i am f3
|
||||
state: exists
|
||||
- name: file4
|
||||
path: "/tmp/mgmt/f4"
|
||||
content: |
|
||||
i am f4
|
||||
state: exists
|
||||
meta:
|
||||
noop: true
|
||||
Reference in New Issue
Block a user