From 1af67e72d439e0b6b5d446874b585c41c6375a3d Mon Sep 17 00:00:00 2001 From: Julien Pivotto Date: Wed, 8 Feb 2017 08:26:52 +0100 Subject: [PATCH] prometheus: Implement basic Prometheus support Signed-off-by: Julien Pivotto --- README.md | 1 + docs/prometheus.md | 79 ++++++++++++++++++++++++++++++++++++++++ lib/cli.go | 12 ++++++ lib/main.go | 27 ++++++++++++++ prometheus/prometheus.go | 59 ++++++++++++++++++++++++++++++ test/shell/t9.sh | 16 ++++++++ 6 files changed, 194 insertions(+) create mode 100644 docs/prometheus.md create mode 100644 prometheus/prometheus.go create mode 100755 test/shell/t9.sh diff --git a/README.md b/README.md index f997e553..80a19556 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ Please read, enjoy and help improve our documentation! | [quick start guide](docs/quick-start-guide.md) | for mgmt developers | | [resource guide](docs/resource-guide.md) | for mgmt developers | | [godoc API reference](https://godoc.org/github.com/purpleidea/mgmt) | for mgmt developers | +| [prometheus guide](docs/prometheus.md) | for everyone | | [puppet guide](docs/puppet-guide.md) | for puppet sysadmins | ## Questions: diff --git a/docs/prometheus.md b/docs/prometheus.md new file mode 100644 index 00000000..8d757de4 --- /dev/null +++ b/docs/prometheus.md @@ -0,0 +1,79 @@ +# mgmt + + + +## Prometheus support + +Mgmt comes with a built-in prometheus support. It is disabled by default, and +can be enabled with the `--prometheus` command line switch. + +By default, the prometheus instance will listen on [`127.0.0.1:9233`][pd]. You can +change this setting by using the `--prometheus-listen` cli option: + +To have mgmt prometheus bind interface on 0.0.0.0:45001, use: +`./mgmt r --prometheus --prometheus-listen :45001` + +## Metrics + +Mgmt exposes two kinds of resources: _go_ metrics and _mgmt_ metrics. + +### go metrics + +We use the [prometheus go_collector][pgc] to expose go metrics. Those metrics +are mainly useful for debugging and perf testing. + +### etcd metrics + +mgmt exposes etcd metrics. Read more in the [upstream documentation][etcdm] + +### mgmt metrics + +Here is a list of the metrics we provide: + +- `mgmt_resources_total`: The number of resources that mgmt is managing +- `mgmt_checkapply_total`: The number of checkapplies that mgmt has run. +- `mgmt_failures_total`: The number of resources that have failed +- `mgmt_failures_current`: The number of resources that have failed + +For each metric, you will get some extra labels: + +- `resource_type`: The type of mgmt resource + +## Alerting + +You can use prometheus to alert you upon changes or failures. We do not provide +such templates yet, but we plan to provide some examples in this repository. +Patches welcome! + +## Grafana + +We do not have grafana dashboards yet. Patches welcome! + +## External resources + +- [prometheus website](https://prometheus.io/) +- [prometheus documentation](https://prometheus.io/docs/introduction/overview/) +- [prometheus best practices regarding metrics + naming](https://prometheus.io/docs/practices/naming/) +- [grafana website](http://grafana.org/) + +[pgc]: https://github.com/prometheus/client_golang/blob/master/prometheus/go_collector.go +[etcdm]: https://coreos.com/etcd/docs/latest/metrics.html +[pd]: https://github.com/prometheus/prometheus/wiki/Default-port-allocation diff --git a/lib/cli.go b/lib/cli.go index 2835bcf7..fce8b30a 100644 --- a/lib/cli.go +++ b/lib/cli.go @@ -115,6 +115,9 @@ func run(c *cli.Context) error { return err } + obj.Prometheus = c.Bool("prometheus") + obj.PrometheusListen = c.String("prometheus-listen") + // install the exit signal handler exit := make(chan struct{}) defer close(exit) @@ -320,6 +323,15 @@ func CLI(program, version string, flags Flags) error { Value: "", Usage: "default identity used for generation", }, + cli.BoolFlag{ + Name: "prometheus", + Usage: "start a prometheus instance", + }, + cli.StringFlag{ + Name: "prometheus-listen", + Value: "", + Usage: "specify prometheus instance binding", + }, }, }, } diff --git a/lib/main.go b/lib/main.go index fa69d237..12ba57c9 100644 --- a/lib/main.go +++ b/lib/main.go @@ -30,6 +30,7 @@ import ( "github.com/purpleidea/mgmt/gapi" "github.com/purpleidea/mgmt/pgp" "github.com/purpleidea/mgmt/pgraph" + "github.com/purpleidea/mgmt/prometheus" "github.com/purpleidea/mgmt/recwatch" "github.com/purpleidea/mgmt/remote" "github.com/purpleidea/mgmt/resources" @@ -93,6 +94,9 @@ type Main struct { PgpIdentity *string pgpKeys *pgp.PGP // agent key pair + Prometheus bool // enable prometheus metrics + PrometheusListen string // prometheus instance bind specification + exit chan error // exit signal } @@ -223,6 +227,21 @@ func (obj *Main) Run() error { return errwrap.Wrapf(err, "Can't create pgraph prefix") } + var prom *prometheus.Prometheus + if obj.Prometheus { + prom = &prometheus.Prometheus{ + Listen: obj.PrometheusListen, + } + if err := prom.Init(); err != nil { + return errwrap.Wrapf(err, "Can't create initiate Prometheus instance") + } + + log.Printf("Main: Prometheus: Starting instance on %s", prom.Listen) + if err := prom.Start(); err != nil { + return errwrap.Wrapf(err, "Can't start initiate Prometheus instance") + } + } + if !obj.NoPgp { pgpPrefix := fmt.Sprintf("%s/", path.Join(prefix, "pgp")) if err := os.MkdirAll(pgpPrefix, 0770); err != nil { @@ -552,6 +571,14 @@ func (obj *Main) Run() error { reterr = multierr.Append(reterr, err) // list of errors } + if obj.Prometheus { + log.Printf("Main: Prometheus: Stopping instance") + if err := prom.Stop(); err != nil { + err = errwrap.Wrapf(err, "Prometheus instance exited poorly!") + reterr = multierr.Append(reterr, err) + } + } + if obj.Flags.Debug { log.Printf("Main: Graph: %v", G) } diff --git a/prometheus/prometheus.go b/prometheus/prometheus.go new file mode 100644 index 00000000..3b42c88c --- /dev/null +++ b/prometheus/prometheus.go @@ -0,0 +1,59 @@ +// Mgmt +// Copyright (C) 2013-2016+ James Shubin and the project contributors +// Written by James Shubin and the project contributors +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +// Package prometheus provides functions that are useful to control and manage +// the build-in prometheus instance. +package prometheus + +import ( + "net/http" + + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +// DefaultPrometheusListen is registered in +// https://github.com/prometheus/prometheus/wiki/Default-port-allocation +const DefaultPrometheusListen = "127.0.0.1:9233" + +// Prometheus is the struct that contains information about the +// prometheus instance. Run Init() on it. +type Prometheus struct { + Listen string // the listen specification for the net/http server +} + +// Init some parameters - currently the Listen address. +func (obj *Prometheus) Init() error { + if len(obj.Listen) == 0 { + obj.Listen = DefaultPrometheusListen + } + return nil +} + +// Start runs a http server in a go routine, that responds to /metrics +// as prometheus would expect. +func (obj *Prometheus) Start() error { + http.Handle("/metrics", promhttp.Handler()) + go http.ListenAndServe(obj.Listen, nil) + return nil +} + +// Stop the http server. +func (obj *Prometheus) Stop() error { + // TODO: There is no way in go < 1.8 to stop a http server. + // https://stackoverflow.com/questions/39320025/go-how-to-stop-http-listenandserve/41433555#41433555 + return nil +} diff --git a/test/shell/t9.sh b/test/shell/t9.sh new file mode 100755 index 00000000..b43d138e --- /dev/null +++ b/test/shell/t9.sh @@ -0,0 +1,16 @@ +#!/bin/bash -e + +# run empty graph, with prometheus support +timeout --kill-after=20s 15s ./mgmt run --tmp-prefix --prometheus & +pid=$! +sleep 5s # let it converge + +# Check that etcd metrics are loaded +curl 127.0.0.1:9233/metrics | grep "^etcd_server_has_leader 1" + +# Check that go metrics are loaded +curl 127.0.0.1:9233/metrics | grep "^go_goroutines [0-9]\+" + +killall -SIGINT mgmt # send ^C to exit mgmt +wait $pid # get exit status +exit $?