prometheus: Implement mgmt_checkapply_total metric
Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu>
This commit is contained in:
@@ -439,6 +439,7 @@ func (obj *Main) Run() error {
|
|||||||
// pass in the information we need
|
// pass in the information we need
|
||||||
newGraph.AssociateData(&resources.Data{
|
newGraph.AssociateData(&resources.Data{
|
||||||
Converger: converger,
|
Converger: converger,
|
||||||
|
Prometheus: prom,
|
||||||
Prefix: pgraphPrefix,
|
Prefix: pgraphPrefix,
|
||||||
Debug: obj.Flags.Debug,
|
Debug: obj.Flags.Debug,
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -224,6 +224,12 @@ func (g *Graph) Process(v *Vertex) error {
|
|||||||
// if this fails, don't UpdateTimestamp()
|
// if this fails, don't UpdateTimestamp()
|
||||||
checkOK, err = obj.CheckApply(!noop)
|
checkOK, err = obj.CheckApply(!noop)
|
||||||
|
|
||||||
|
if obj.Prometheus() != nil {
|
||||||
|
if promErr := obj.Prometheus().UpdateCheckApplyTotal(obj.Kind(), !noop, !checkOK, err != nil); promErr != nil {
|
||||||
|
// TODO: how to error correctly
|
||||||
|
log.Printf("%s[%s]: Prometheus.UpdateCheckApplyTotal() errored: %v", v.Kind(), v.GetName(), err)
|
||||||
|
}
|
||||||
|
}
|
||||||
// TODO: Can the `Poll` converged timeout tracking be a
|
// TODO: Can the `Poll` converged timeout tracking be a
|
||||||
// more general method for all converged timeouts? this
|
// more general method for all converged timeouts? this
|
||||||
// would simplify the resources by removing boilerplate
|
// would simplify the resources by removing boilerplate
|
||||||
|
|||||||
@@ -21,7 +21,9 @@ package prometheus
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -33,6 +35,9 @@ const DefaultPrometheusListen = "127.0.0.1:9233"
|
|||||||
// prometheus instance. Run Init() on it.
|
// prometheus instance. Run Init() on it.
|
||||||
type Prometheus struct {
|
type Prometheus struct {
|
||||||
Listen string // the listen specification for the net/http server
|
Listen string // the listen specification for the net/http server
|
||||||
|
|
||||||
|
checkApplyTotal *prometheus.CounterVec // total of CheckApplies that have been triggered
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init some parameters - currently the Listen address.
|
// Init some parameters - currently the Listen address.
|
||||||
@@ -40,6 +45,20 @@ func (obj *Prometheus) Init() error {
|
|||||||
if len(obj.Listen) == 0 {
|
if len(obj.Listen) == 0 {
|
||||||
obj.Listen = DefaultPrometheusListen
|
obj.Listen = DefaultPrometheusListen
|
||||||
}
|
}
|
||||||
|
obj.checkApplyTotal = prometheus.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Name: "mgmt_checkapply_total",
|
||||||
|
Help: "Number of CheckApply that have run.",
|
||||||
|
},
|
||||||
|
// Labels for this metric.
|
||||||
|
// kind: resource type: Svc, File, ...
|
||||||
|
// apply: if the CheckApply happened in "apply" mode
|
||||||
|
// eventful: did the CheckApply generate an event
|
||||||
|
// errorful: did the CheckApply generate an error
|
||||||
|
[]string{"kind", "apply", "eventful", "errorful"},
|
||||||
|
)
|
||||||
|
prometheus.MustRegister(obj.checkApplyTotal)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -57,3 +76,12 @@ func (obj *Prometheus) Stop() error {
|
|||||||
// https://stackoverflow.com/questions/39320025/go-how-to-stop-http-listenandserve/41433555#41433555
|
// https://stackoverflow.com/questions/39320025/go-how-to-stop-http-listenandserve/41433555#41433555
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UpdateCheckApplyTotal refreshes the failing gauge by parsing the internal
|
||||||
|
// state map.
|
||||||
|
func (obj *Prometheus) UpdateCheckApplyTotal(kind string, apply, eventful, errorful bool) error {
|
||||||
|
labels := prometheus.Labels{"kind": kind, "apply": strconv.FormatBool(apply), "eventful": strconv.FormatBool(eventful), "errorful": strconv.FormatBool(errorful)}
|
||||||
|
metric := obj.checkApplyTotal.With(labels)
|
||||||
|
metric.Inc()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ import (
|
|||||||
// TODO: should each resource be a sub-package?
|
// TODO: should each resource be a sub-package?
|
||||||
"github.com/purpleidea/mgmt/converger"
|
"github.com/purpleidea/mgmt/converger"
|
||||||
"github.com/purpleidea/mgmt/event"
|
"github.com/purpleidea/mgmt/event"
|
||||||
|
"github.com/purpleidea/mgmt/prometheus"
|
||||||
|
|
||||||
errwrap "github.com/pkg/errors"
|
errwrap "github.com/pkg/errors"
|
||||||
"golang.org/x/time/rate"
|
"golang.org/x/time/rate"
|
||||||
@@ -58,6 +59,7 @@ type Data struct {
|
|||||||
//Hostname string // uuid for the host
|
//Hostname string // uuid for the host
|
||||||
//Noop bool
|
//Noop bool
|
||||||
Converger converger.Converger
|
Converger converger.Converger
|
||||||
|
Prometheus *prometheus.Prometheus
|
||||||
Prefix string // the prefix to be used for the pgraph namespace
|
Prefix string // the prefix to be used for the pgraph namespace
|
||||||
Debug bool
|
Debug bool
|
||||||
// NOTE: we can add more fields here if needed for the resources.
|
// NOTE: we can add more fields here if needed for the resources.
|
||||||
@@ -164,6 +166,7 @@ type Base interface {
|
|||||||
Started() <-chan struct{} // returns when the resource has started
|
Started() <-chan struct{} // returns when the resource has started
|
||||||
Starter(bool)
|
Starter(bool)
|
||||||
Poll(chan *event.Event) error // poll alternative to watching :(
|
Poll(chan *event.Event) error // poll alternative to watching :(
|
||||||
|
Prometheus() *prometheus.Prometheus
|
||||||
}
|
}
|
||||||
|
|
||||||
// Res is the minimum interface you need to implement to define a new resource.
|
// Res is the minimum interface you need to implement to define a new resource.
|
||||||
@@ -193,6 +196,7 @@ type BaseRes struct {
|
|||||||
events chan *event.Event
|
events chan *event.Event
|
||||||
converger converger.Converger // converged tracking
|
converger converger.Converger // converged tracking
|
||||||
cuid converger.ConvergerUID
|
cuid converger.ConvergerUID
|
||||||
|
prometheus *prometheus.Prometheus
|
||||||
prefix string // base prefix for this resource
|
prefix string // base prefix for this resource
|
||||||
debug bool
|
debug bool
|
||||||
state ResState
|
state ResState
|
||||||
@@ -348,6 +352,7 @@ func (obj *BaseRes) Events() chan *event.Event {
|
|||||||
// AssociateData associates some data with the object in question.
|
// AssociateData associates some data with the object in question.
|
||||||
func (obj *BaseRes) AssociateData(data *Data) {
|
func (obj *BaseRes) AssociateData(data *Data) {
|
||||||
obj.converger = data.Converger
|
obj.converger = data.Converger
|
||||||
|
obj.prometheus = data.Prometheus
|
||||||
obj.prefix = data.Prefix
|
obj.prefix = data.Prefix
|
||||||
obj.debug = data.Debug
|
obj.debug = data.Debug
|
||||||
}
|
}
|
||||||
@@ -561,6 +566,11 @@ func (obj *BaseRes) Poll(processChan chan *event.Event) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Prometheus returns the prometheus instance.
|
||||||
|
func (obj *BaseRes) Prometheus() *prometheus.Prometheus {
|
||||||
|
return obj.prometheus
|
||||||
|
}
|
||||||
|
|
||||||
// ResToB64 encodes a resource to a base64 encoded string (after serialization)
|
// ResToB64 encodes a resource to a base64 encoded string (after serialization)
|
||||||
func ResToB64(res Res) (string, error) {
|
func ResToB64(res Res) (string, error) {
|
||||||
b := bytes.Buffer{}
|
b := bytes.Buffer{}
|
||||||
|
|||||||
20
test/shell/prometheus-3.sh
Executable file
20
test/shell/prometheus-3.sh
Executable file
@@ -0,0 +1,20 @@
|
|||||||
|
#!/bin/bash -e
|
||||||
|
|
||||||
|
# run empty graph, with prometheus support
|
||||||
|
timeout --kill-after=20s 15s ./mgmt run --tmp-prefix --prometheus --yaml prometheus-3.yaml &
|
||||||
|
pid=$!
|
||||||
|
sleep 10s # let it converge
|
||||||
|
|
||||||
|
# For test debugging purpose
|
||||||
|
curl 127.0.0.1:9233/metrics
|
||||||
|
|
||||||
|
# Three CheckApply for a File ; with events
|
||||||
|
curl 127.0.0.1:9233/metrics | grep '^mgmt_checkapply_total{apply="true",errorful="false",eventful="true",kind="File"} 3$'
|
||||||
|
|
||||||
|
# One CheckApply for a File ; in noop mode.
|
||||||
|
curl 127.0.0.1:9233/metrics | grep '^mgmt_checkapply_total{apply="false",errorful="false",eventful="true",kind="File"} 1$'
|
||||||
|
|
||||||
|
|
||||||
|
killall -SIGINT mgmt # send ^C to exit mgmt
|
||||||
|
wait $pid # get exit status
|
||||||
|
exit $?
|
||||||
26
test/shell/prometheus-3.yaml
Normal file
26
test/shell/prometheus-3.yaml
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
---
|
||||||
|
graph: mygraph
|
||||||
|
resources:
|
||||||
|
file:
|
||||||
|
- name: file1
|
||||||
|
path: "/tmp/mgmt/f1"
|
||||||
|
content: |
|
||||||
|
i am f1
|
||||||
|
state: exists
|
||||||
|
- name: file2
|
||||||
|
path: "/tmp/mgmt/f2"
|
||||||
|
content: |
|
||||||
|
i am f2
|
||||||
|
state: exists
|
||||||
|
- name: file3
|
||||||
|
path: "/tmp/mgmt/f3"
|
||||||
|
content: |
|
||||||
|
i am f3
|
||||||
|
state: exists
|
||||||
|
- name: file4
|
||||||
|
path: "/tmp/mgmt/f4"
|
||||||
|
content: |
|
||||||
|
i am f4
|
||||||
|
state: exists
|
||||||
|
meta:
|
||||||
|
noop: true
|
||||||
Reference in New Issue
Block a user