Skip to content

Commit

Permalink
Only export needed metrics
Browse files Browse the repository at this point in the history
Since we cannot use the uuid check because its changed at each scrape
and so cannot differentiate them we export only some checks..
  • Loading branch information
lconsuegra committed Oct 12, 2020
1 parent 03d5ff9 commit 542e314
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 21 deletions.
26 changes: 21 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Prometheus exporter for exposing ipa-healthcheck metrics. It's essentially a wra

## Prerequisites

* Freeipa 4.8.0 at least, since this exporter uses the tool ["freeipa-healthcheck"](https://github.com/freeipa/freeipa-healthcheck).
* The tool ["freeipa-healthcheck"](https://github.com/freeipa/freeipa-healthcheck).

## Running

Expand Down Expand Up @@ -52,10 +52,23 @@ Usage of ./ipa-healthcheck_exporter:

## Exported Metrics

| Metric Name | Description |
| --------------------------------------------------- | ------------------------------------------------------------------------------- |
| `ipa_healthcheck_state` | State of a IPA healthcheck (1: active, 0: inactive)" |

```
# HELP ipa_cert_expiration Expiration date of the certificates in warning state (unix timestamp)
# TYPE ipa_cert_expiration gauge
ipa_cert_expiration{certificate_request_id="20200626075943"} 1.604761504e+09
...
# HELP ipa_dogtag_connectivity_check Check to verify dogtag basic connectivity. (1: success, 0: error)
# TYPE ipa_dogtag_connectivity_check gauge
ipa_dogtag_connectivity_check{ipahealthcheck="DogtagCertsConnectivityCheck"} 1
# HELP ipa_replication_check Replication checks (1: success, 0: error)
# TYPE ipa_replication_check gauge
ipa_replication_check{ipahealthcheck="ReplicationConflictCheck"} 1
# HELP ipa_service_state State of the services monitored by IPA healthcheck (1: running, 0: not running)
# TYPE ipa_service_state gauge
ipa_service_state{service="certmonger"} 1
ipa_service_state{service="httpd"} 1
...
```

## Prometheus

Expand Down Expand Up @@ -98,3 +111,6 @@ When a check is in error you can rerun it on the server to have more information
```

We currently have to use the --output-file option of the ipa-healthcheck command and a temp file to parse the checks otherwise some warnings are written on stdout alongside the json output.

TODO :
* Our own direct scraping mechanism (via ipalib) to not be tied to ipa-healthcheck and better performance.
96 changes: 80 additions & 16 deletions ipahealthcheck_exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ import (
"os"
"os/exec"
"os/signal"
"strings"
"syscall"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
Expand All @@ -22,19 +22,56 @@ var (
ipahealthcheckPath string
port int

ipahealthcheckStateDesc = prometheus.NewDesc(
"ipa_healthcheck_state",
"State of a IPA healthcheck (1: active, 0: inactive)",
[]string{"severity", "source", "check"}, nil,
ipahealthcheckServiceStateDesc = prometheus.NewDesc(
"ipa_service_state",
"State of the services monitored by IPA healthcheck (1: running, 0: not running)",
[]string{"service"}, nil,
)

ipahealthcheckDogtagCheckDesc = prometheus.NewDesc(
"ipa_dogtag_connectivity_check",
"Check to verify dogtag basic connectivity. (1: success, 0: error)",
[]string{"ipahealthcheck"}, nil,
)

ipahealthcheckReplicationCheckDesc = prometheus.NewDesc(
"ipa_replication_check",
"Replication checks (1: success, 0: error)",
[]string{"ipahealthcheck"}, nil,
)

ipahealthcheckCertExpirationDesc = prometheus.NewDesc(
"ipa_cert_expiration",
"Expiration date of the certificates in warning state (unix timestamp)",
[]string{"certificate_request_id"}, nil,
)

scrapedChecks = map[string]scrapedCheck{
"ipahealthcheck.meta.services": {
scrape: true,
metricsDesc: ipahealthcheckServiceStateDesc,
},
"ipahealthcheck.ds.replication": {
scrape: true,
metricsDesc: ipahealthcheckReplicationCheckDesc,
},
"DogtagCertsConnectivityCheck": {
scrape: true,
metricsDesc: ipahealthcheckDogtagCheckDesc,
},
}
)

type ipaCheck struct {
Source string
Check string
Result string
When string
Duration string
Source string
Check string
Result string
Kw map[string]interface{}
}

type scrapedCheck struct {
scrape bool
metricsDesc *prometheus.Desc
}

type ipahealthcheckCollector struct {
Expand All @@ -48,14 +85,16 @@ func init() {
}

func (ic ipahealthcheckCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- ipahealthcheckStateDesc
ch <- ipahealthcheckServiceStateDesc
ch <- ipahealthcheckDogtagCheckDesc
ch <- ipahealthcheckReplicationCheckDesc
ch <- ipahealthcheckCertExpirationDesc
}

func (ic ipahealthcheckCollector) Collect(ch chan<- prometheus.Metric) {
log.Infof("Scraping metrics from %v", ic.ipahealthcheckPath)

var checks []ipaCheck
severityLevels := []string{"SUCCESS", "CRITICAL", "ERROR", "WARNING"}
tmpFile, err := ioutil.TempFile("/dev/shm", "ipa-healthcheck.out")
if err != nil {
log.Fatal("Cannot write ipa-healthcheck output for parsing: ", err)
Expand All @@ -79,16 +118,41 @@ func (ic ipahealthcheckCollector) Collect(ch chan<- prometheus.Metric) {

for _, check := range checks {

for _, level := range severityLevels {
if scrapedChecks[check.Source].scrape {

if check.Result == "SUCCESS" {
ch <- prometheus.MustNewConstMetric(scrapedChecks[check.Source].metricsDesc, prometheus.GaugeValue, 1.0, check.Check)
} else {
ch <- prometheus.MustNewConstMetric(scrapedChecks[check.Source].metricsDesc, prometheus.GaugeValue, 0.0, check.Check)
}
}

if level == check.Result {
ch <- prometheus.MustNewConstMetric(ipahealthcheckStateDesc, prometheus.GaugeValue, 1.0, strings.ToLower(level), check.Source, check.Check)
if scrapedChecks[check.Check].scrape {

if check.Result == "SUCCESS" {
ch <- prometheus.MustNewConstMetric(scrapedChecks[check.Check].metricsDesc, prometheus.GaugeValue, 1.0, check.Check)
} else {
ch <- prometheus.MustNewConstMetric(ipahealthcheckStateDesc, prometheus.GaugeValue, 0.0, strings.ToLower(level), check.Source, check.Check)
ch <- prometheus.MustNewConstMetric(scrapedChecks[check.Check].metricsDesc, prometheus.GaugeValue, 0.0, check.Check)
}
}

if check.Source == "ipahealthcheck.ipa.certs" && check.Check == "IPACertmongerExpirationCheck" {

if check.Result == "WARNING" {

timestamp, err := time.Parse("20060102150405Z", check.Kw["expiration_date"].(string))

if err != nil {
log.Infof("A problem occured while getting the certificate expiration (request id : %v) : %v", check.Kw["key"].(string), err)
} else {
ch <- prometheus.MustNewConstMetric(ipahealthcheckCertExpirationDesc, prometheus.GaugeValue, float64(timestamp.Unix()), check.Kw["key"].(string))
}
}
}

}

defer os.Remove(tmpFile.Name())
}

func main() {
Expand Down

0 comments on commit 542e314

Please sign in to comment.