diff --git a/README.md b/README.md index fa0871e..21aca9e 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Prometheus exporter for exposing ipa-healthcheck metrics. It's essentially a wra ## Prerequisites - * Freeipa 4.8.0 at least, since this exporter uses the tool ["freeipa-healthcheck"](https://github.com/freeipa/freeipa-healthcheck). + * The tool ["freeipa-healthcheck"](https://github.com/freeipa/freeipa-healthcheck). ## Running @@ -52,10 +52,23 @@ Usage of ./ipa-healthcheck_exporter: ## Exported Metrics -| Metric Name | Description | -| --------------------------------------------------- | ------------------------------------------------------------------------------- | -| `ipa_healthcheck_state` | State of a IPA healthcheck (1: active, 0: inactive)" | - +``` +# HELP ipa_cert_expiration Expiration date of the certificates in warning state (unix timestamp) +# TYPE ipa_cert_expiration gauge +ipa_cert_expiration{certificate_request_id="20200626075943"} 1.604761504e+09 +... +# HELP ipa_dogtag_connectivity_check Check to verify dogtag basic connectivity. (1: success, 0: error) +# TYPE ipa_dogtag_connectivity_check gauge +ipa_dogtag_connectivity_check{ipahealthcheck="DogtagCertsConnectivityCheck"} 1 +# HELP ipa_replication_check Replication checks (1: success, 0: error) +# TYPE ipa_replication_check gauge +ipa_replication_check{ipahealthcheck="ReplicationConflictCheck"} 1 +# HELP ipa_service_state State of the services monitored by IPA healthcheck (1: running, 0: not running) +# TYPE ipa_service_state gauge +ipa_service_state{service="certmonger"} 1 +ipa_service_state{service="httpd"} 1 +... +``` ## Prometheus @@ -98,3 +111,6 @@ When a check is in error you can rerun it on the server to have more information ``` We currently have to use the --output-file option of the ipa-healthcheck command and a temp file to parse the checks otherwise some warnings are written on stdout alongside the json output. + +TODO : + * Our own direct scraping mechanism (via ipalib) to not be tied to ipa-healthcheck and better performance. diff --git a/ipahealthcheck_exporter.go b/ipahealthcheck_exporter.go index ae2322e..0dc79a6 100644 --- a/ipahealthcheck_exporter.go +++ b/ipahealthcheck_exporter.go @@ -9,8 +9,8 @@ import ( "os" "os/exec" "os/signal" - "strings" "syscall" + "time" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" @@ -22,19 +22,56 @@ var ( ipahealthcheckPath string port int - ipahealthcheckStateDesc = prometheus.NewDesc( - "ipa_healthcheck_state", - "State of a IPA healthcheck (1: active, 0: inactive)", - []string{"severity", "source", "check"}, nil, + ipahealthcheckServiceStateDesc = prometheus.NewDesc( + "ipa_service_state", + "State of the services monitored by IPA healthcheck (1: running, 0: not running)", + []string{"service"}, nil, ) + + ipahealthcheckDogtagCheckDesc = prometheus.NewDesc( + "ipa_dogtag_connectivity_check", + "Check to verify dogtag basic connectivity. (1: success, 0: error)", + []string{"ipahealthcheck"}, nil, + ) + + ipahealthcheckReplicationCheckDesc = prometheus.NewDesc( + "ipa_replication_check", + "Replication checks (1: success, 0: error)", + []string{"ipahealthcheck"}, nil, + ) + + ipahealthcheckCertExpirationDesc = prometheus.NewDesc( + "ipa_cert_expiration", + "Expiration date of the certificates in warning state (unix timestamp)", + []string{"certificate_request_id"}, nil, + ) + + scrapedChecks = map[string]scrapedCheck{ + "ipahealthcheck.meta.services": { + scrape: true, + metricsDesc: ipahealthcheckServiceStateDesc, + }, + "ipahealthcheck.ds.replication": { + scrape: true, + metricsDesc: ipahealthcheckReplicationCheckDesc, + }, + "DogtagCertsConnectivityCheck": { + scrape: true, + metricsDesc: ipahealthcheckDogtagCheckDesc, + }, + } ) type ipaCheck struct { - Source string - Check string - Result string - When string - Duration string + Source string + Check string + Result string + Kw map[string]interface{} +} + +type scrapedCheck struct { + scrape bool + metricsDesc *prometheus.Desc } type ipahealthcheckCollector struct { @@ -48,14 +85,16 @@ func init() { } func (ic ipahealthcheckCollector) Describe(ch chan<- *prometheus.Desc) { - ch <- ipahealthcheckStateDesc + ch <- ipahealthcheckServiceStateDesc + ch <- ipahealthcheckDogtagCheckDesc + ch <- ipahealthcheckReplicationCheckDesc + ch <- ipahealthcheckCertExpirationDesc } func (ic ipahealthcheckCollector) Collect(ch chan<- prometheus.Metric) { log.Infof("Scraping metrics from %v", ic.ipahealthcheckPath) var checks []ipaCheck - severityLevels := []string{"SUCCESS", "CRITICAL", "ERROR", "WARNING"} tmpFile, err := ioutil.TempFile("/dev/shm", "ipa-healthcheck.out") if err != nil { log.Fatal("Cannot write ipa-healthcheck output for parsing: ", err) @@ -79,16 +118,41 @@ func (ic ipahealthcheckCollector) Collect(ch chan<- prometheus.Metric) { for _, check := range checks { - for _, level := range severityLevels { + if scrapedChecks[check.Source].scrape { + + if check.Result == "SUCCESS" { + ch <- prometheus.MustNewConstMetric(scrapedChecks[check.Source].metricsDesc, prometheus.GaugeValue, 1.0, check.Check) + } else { + ch <- prometheus.MustNewConstMetric(scrapedChecks[check.Source].metricsDesc, prometheus.GaugeValue, 0.0, check.Check) + } + } - if level == check.Result { - ch <- prometheus.MustNewConstMetric(ipahealthcheckStateDesc, prometheus.GaugeValue, 1.0, strings.ToLower(level), check.Source, check.Check) + if scrapedChecks[check.Check].scrape { + + if check.Result == "SUCCESS" { + ch <- prometheus.MustNewConstMetric(scrapedChecks[check.Check].metricsDesc, prometheus.GaugeValue, 1.0, check.Check) } else { - ch <- prometheus.MustNewConstMetric(ipahealthcheckStateDesc, prometheus.GaugeValue, 0.0, strings.ToLower(level), check.Source, check.Check) + ch <- prometheus.MustNewConstMetric(scrapedChecks[check.Check].metricsDesc, prometheus.GaugeValue, 0.0, check.Check) + } + } + if check.Source == "ipahealthcheck.ipa.certs" && check.Check == "IPACertmongerExpirationCheck" { + + if check.Result == "WARNING" { + + timestamp, err := time.Parse("20060102150405Z", check.Kw["expiration_date"].(string)) + + if err != nil { + log.Infof("A problem occured while getting the certificate expiration (request id : %v) : %v", check.Kw["key"].(string), err) + } else { + ch <- prometheus.MustNewConstMetric(ipahealthcheckCertExpirationDesc, prometheus.GaugeValue, float64(timestamp.Unix()), check.Kw["key"].(string)) + } } } + } + + defer os.Remove(tmpFile.Name()) } func main() {