diff --git a/docs/prometheus.md b/docs/prometheus.md index 3c3b90dd..f91872d1 100644 --- a/docs/prometheus.md +++ b/docs/prometheus.md @@ -31,6 +31,7 @@ Here is a list of the metrics we provide: - `mgmt_checkapply_total`: The number of CheckApply's that mgmt has run - `mgmt_failures_total`: The number of resources that have failed - `mgmt_failures_current`: The number of resources that have failed +- `mgmt_graph_start_time_seconds`: Start time of the current graph since unix epoch in seconds For each metric, you will get some extra labels: diff --git a/lib/main.go b/lib/main.go index 61fd9f38..f216534a 100644 --- a/lib/main.go +++ b/lib/main.go @@ -480,6 +480,12 @@ func (obj *Main) Run() error { log.Printf("Graphviz: Successfully generated graph!") } } + + // Call this here because at this point the graph does not + // know anything about the prometheus instance. + if err := prom.UpdatePgraphStartTime(); err != nil { + log.Printf("Main: Prometheus.UpdatePgraphStartTime() errored: %v", err) + } // G.Start(...) needs to be synchronous or wait, // because if half of the nodes are started and // some are not ready yet and the EtcdWatch diff --git a/pgraph/pgraph.go b/pgraph/pgraph.go index e8778735..da8d4230 100644 --- a/pgraph/pgraph.go +++ b/pgraph/pgraph.go @@ -24,6 +24,7 @@ import ( "sync" "github.com/purpleidea/mgmt/event" + "github.com/purpleidea/mgmt/prometheus" "github.com/purpleidea/mgmt/resources" errwrap "github.com/pkg/errors" @@ -58,6 +59,8 @@ type Graph struct { state graphState mutex *sync.Mutex // used when modifying graph State variable wg *sync.WaitGroup + + prometheus *prometheus.Prometheus // the prometheus instance } // Vertex is the primary vertex struct in this library. @@ -119,6 +122,8 @@ func (g *Graph) Copy() *Graph { state: g.state, mutex: g.mutex, wg: g.wg, + + prometheus: g.prometheus, } for k, v := range g.Adjacency { newGraph.Adjacency[k] = v // copy @@ -645,6 +650,9 @@ func (g *Graph) GraphMetas() []*resources.MetaParams { // AssociateData associates some data with the object in the graph in question. func (g *Graph) AssociateData(data *resources.Data) { + // prometheus needs to be associated to this graph as well + g.prometheus = data.Prometheus + for k := range g.Adjacency { k.Res.AssociateData(data) } diff --git a/prometheus/prometheus.go b/prometheus/prometheus.go index d3c9c25a..34e5cf0e 100644 --- a/prometheus/prometheus.go +++ b/prometheus/prometheus.go @@ -36,7 +36,8 @@ const DefaultPrometheusListen = "127.0.0.1:9233" type Prometheus struct { Listen string // the listen specification for the net/http server - checkApplyTotal *prometheus.CounterVec // total of CheckApplies that have been triggered + checkApplyTotal *prometheus.CounterVec // total of CheckApplies that have been triggered + pgraphStartTimeSeconds prometheus.Gauge // process start time in seconds since unix epoch } @@ -59,6 +60,14 @@ func (obj *Prometheus) Init() error { ) prometheus.MustRegister(obj.checkApplyTotal) + obj.pgraphStartTimeSeconds = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "mgmt_graph_start_time_seconds", + Help: "Start time of the current graph since unix epoch in seconds.", + }, + ) + prometheus.MustRegister(obj.pgraphStartTimeSeconds) + return nil } @@ -88,3 +97,13 @@ func (obj *Prometheus) UpdateCheckApplyTotal(kind string, apply, eventful, error metric.Inc() return nil } + +// UpdatePgraphStartTime updates the mgmt_graph_start_time_seconds metric +// to the current timestamp. +func (obj *Prometheus) UpdatePgraphStartTime() error { + if obj == nil { + return nil // happens when mgmt is launched without --prometheus + } + obj.pgraphStartTimeSeconds.SetToCurrentTime() + return nil +} diff --git a/test/shell/prometheus-3.sh b/test/shell/prometheus-3.sh index 7cf2119e..8d510745 100755 --- a/test/shell/prometheus-3.sh +++ b/test/shell/prometheus-3.sh @@ -14,6 +14,9 @@ curl 127.0.0.1:9233/metrics | grep '^mgmt_checkapply_total{apply="true",errorful # One CheckApply for a File ; in noop mode. curl 127.0.0.1:9233/metrics | grep '^mgmt_checkapply_total{apply="false",errorful="false",eventful="true",kind="File"} 1$' +# Check mgmt_graph_start_time_seconds +curl 127.0.0.1:9233/metrics | grep "^mgmt_graph_start_time_seconds [1-9]\+" + killall -SIGINT mgmt # send ^C to exit mgmt wait $pid # get exit status exit $?