prometheus: Add mgmt_pgraph_start_time_seconds metric
Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu>
This commit is contained in:
@@ -31,6 +31,7 @@ Here is a list of the metrics we provide:
|
|||||||
- `mgmt_checkapply_total`: The number of CheckApply's that mgmt has run
|
- `mgmt_checkapply_total`: The number of CheckApply's that mgmt has run
|
||||||
- `mgmt_failures_total`: The number of resources that have failed
|
- `mgmt_failures_total`: The number of resources that have failed
|
||||||
- `mgmt_failures_current`: The number of resources that have failed
|
- `mgmt_failures_current`: The number of resources that have failed
|
||||||
|
- `mgmt_graph_start_time_seconds`: Start time of the current graph since unix epoch in seconds
|
||||||
|
|
||||||
For each metric, you will get some extra labels:
|
For each metric, you will get some extra labels:
|
||||||
|
|
||||||
|
|||||||
@@ -480,6 +480,12 @@ func (obj *Main) Run() error {
|
|||||||
log.Printf("Graphviz: Successfully generated graph!")
|
log.Printf("Graphviz: Successfully generated graph!")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Call this here because at this point the graph does not
|
||||||
|
// know anything about the prometheus instance.
|
||||||
|
if err := prom.UpdatePgraphStartTime(); err != nil {
|
||||||
|
log.Printf("Main: Prometheus.UpdatePgraphStartTime() errored: %v", err)
|
||||||
|
}
|
||||||
// G.Start(...) needs to be synchronous or wait,
|
// G.Start(...) needs to be synchronous or wait,
|
||||||
// because if half of the nodes are started and
|
// because if half of the nodes are started and
|
||||||
// some are not ready yet and the EtcdWatch
|
// some are not ready yet and the EtcdWatch
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/purpleidea/mgmt/event"
|
"github.com/purpleidea/mgmt/event"
|
||||||
|
"github.com/purpleidea/mgmt/prometheus"
|
||||||
"github.com/purpleidea/mgmt/resources"
|
"github.com/purpleidea/mgmt/resources"
|
||||||
|
|
||||||
errwrap "github.com/pkg/errors"
|
errwrap "github.com/pkg/errors"
|
||||||
@@ -58,6 +59,8 @@ type Graph struct {
|
|||||||
state graphState
|
state graphState
|
||||||
mutex *sync.Mutex // used when modifying graph State variable
|
mutex *sync.Mutex // used when modifying graph State variable
|
||||||
wg *sync.WaitGroup
|
wg *sync.WaitGroup
|
||||||
|
|
||||||
|
prometheus *prometheus.Prometheus // the prometheus instance
|
||||||
}
|
}
|
||||||
|
|
||||||
// Vertex is the primary vertex struct in this library.
|
// Vertex is the primary vertex struct in this library.
|
||||||
@@ -119,6 +122,8 @@ func (g *Graph) Copy() *Graph {
|
|||||||
state: g.state,
|
state: g.state,
|
||||||
mutex: g.mutex,
|
mutex: g.mutex,
|
||||||
wg: g.wg,
|
wg: g.wg,
|
||||||
|
|
||||||
|
prometheus: g.prometheus,
|
||||||
}
|
}
|
||||||
for k, v := range g.Adjacency {
|
for k, v := range g.Adjacency {
|
||||||
newGraph.Adjacency[k] = v // copy
|
newGraph.Adjacency[k] = v // copy
|
||||||
@@ -645,6 +650,9 @@ func (g *Graph) GraphMetas() []*resources.MetaParams {
|
|||||||
|
|
||||||
// AssociateData associates some data with the object in the graph in question.
|
// AssociateData associates some data with the object in the graph in question.
|
||||||
func (g *Graph) AssociateData(data *resources.Data) {
|
func (g *Graph) AssociateData(data *resources.Data) {
|
||||||
|
// prometheus needs to be associated to this graph as well
|
||||||
|
g.prometheus = data.Prometheus
|
||||||
|
|
||||||
for k := range g.Adjacency {
|
for k := range g.Adjacency {
|
||||||
k.Res.AssociateData(data)
|
k.Res.AssociateData(data)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -36,7 +36,8 @@ const DefaultPrometheusListen = "127.0.0.1:9233"
|
|||||||
type Prometheus struct {
|
type Prometheus struct {
|
||||||
Listen string // the listen specification for the net/http server
|
Listen string // the listen specification for the net/http server
|
||||||
|
|
||||||
checkApplyTotal *prometheus.CounterVec // total of CheckApplies that have been triggered
|
checkApplyTotal *prometheus.CounterVec // total of CheckApplies that have been triggered
|
||||||
|
pgraphStartTimeSeconds prometheus.Gauge // process start time in seconds since unix epoch
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -59,6 +60,14 @@ func (obj *Prometheus) Init() error {
|
|||||||
)
|
)
|
||||||
prometheus.MustRegister(obj.checkApplyTotal)
|
prometheus.MustRegister(obj.checkApplyTotal)
|
||||||
|
|
||||||
|
obj.pgraphStartTimeSeconds = prometheus.NewGauge(
|
||||||
|
prometheus.GaugeOpts{
|
||||||
|
Name: "mgmt_graph_start_time_seconds",
|
||||||
|
Help: "Start time of the current graph since unix epoch in seconds.",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
prometheus.MustRegister(obj.pgraphStartTimeSeconds)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -88,3 +97,13 @@ func (obj *Prometheus) UpdateCheckApplyTotal(kind string, apply, eventful, error
|
|||||||
metric.Inc()
|
metric.Inc()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UpdatePgraphStartTime updates the mgmt_graph_start_time_seconds metric
|
||||||
|
// to the current timestamp.
|
||||||
|
func (obj *Prometheus) UpdatePgraphStartTime() error {
|
||||||
|
if obj == nil {
|
||||||
|
return nil // happens when mgmt is launched without --prometheus
|
||||||
|
}
|
||||||
|
obj.pgraphStartTimeSeconds.SetToCurrentTime()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -14,6 +14,9 @@ curl 127.0.0.1:9233/metrics | grep '^mgmt_checkapply_total{apply="true",errorful
|
|||||||
# One CheckApply for a File ; in noop mode.
|
# One CheckApply for a File ; in noop mode.
|
||||||
curl 127.0.0.1:9233/metrics | grep '^mgmt_checkapply_total{apply="false",errorful="false",eventful="true",kind="File"} 1$'
|
curl 127.0.0.1:9233/metrics | grep '^mgmt_checkapply_total{apply="false",errorful="false",eventful="true",kind="File"} 1$'
|
||||||
|
|
||||||
|
# Check mgmt_graph_start_time_seconds
|
||||||
|
curl 127.0.0.1:9233/metrics | grep "^mgmt_graph_start_time_seconds [1-9]\+"
|
||||||
|
|
||||||
killall -SIGINT mgmt # send ^C to exit mgmt
|
killall -SIGINT mgmt # send ^C to exit mgmt
|
||||||
wait $pid # get exit status
|
wait $pid # get exit status
|
||||||
exit $?
|
exit $?
|
||||||
|
|||||||
Reference in New Issue
Block a user