-
Notifications
You must be signed in to change notification settings - Fork 44
Dora Exporter and Dockerizing Application
DORA Exporter is a tool designed to collect and export DORA (DevOps Research and Assessment) metrics from CI/CD pipelines. These metrics are crucial for measuring and improving software delivery performance.
The DORA metrics include:
- Deployment Frequency
- Lead Time for Changes
- Time to Restore Service
- Change Failure Rate
This exporter integrates with CI/CD pipelines to automatically collect and report these metrics, providing valuable insights into the development process.
- Python Installed
- Prometheus installed
There are limited DORA exporters currently available so instead, in this case, we will create our own exporter using Python.
- Create a directory called
dora-exporter
and cd into it.
mkdir -p dora-exporter
cd dora-exporter
- Create a
main.py
file that will container the DORA exporter python script and paste the below contents into the file:
import os
import time
from datetime import datetime
from typing import Dict, List, Tuple
from prometheus_client import start_http_server, Gauge
from dotenv import load_dotenv
import requests
from statistics import mean
import logging
load_dotenv()
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)
DORA_METRICS = {
"deployment_frequency": Gauge("deployment_frequency", "Deployment Frequency", ["branch", "repo", "status"]),
"lead_time_for_changes": Gauge("lead_time_for_changes", "Lead Time for Changes", ["branch", "repo"]),
"change_failure_rate": Gauge("change_failure_rate", "Change Failure Rate", ["branch", "repo"]),
"mean_time_to_recovery": Gauge("mean_time_to_recovery", "Mean Time to Recovery", ["branch", "repo"]),
}
ORGANIZATION = os.getenv('GITHUB_ORGANIZATION')
REPOSITORY = os.getenv('GITHUB_REPOSITORY')
AUTH_TOKEN = os.getenv('GITHUB_TOKEN')
def fetch_deployment_data() -> List[Dict]:
## Fetch deployment data from GitHub
logger.info('Fetching deployment data from GitHub')
url = f"https://api.github.com/repos/{ORGANIZATION}/{REPOSITORY}/actions/runs"
headers = {
"Authorization": f"Bearer {AUTH_TOKEN}",
"Content-Type": "application/json",
}
params = {"status": "completed", "event": "push"}
deployments = []
while url:
try:
response = requests.get(url, headers=headers, params=params)
response.raise_for_status()
data = response.json()
deployments.extend(data["workflow_runs"])
url = response.links.get("next", {}).get("url")
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching deployment data: {e}")
break
logger.info(f"Fetched {len(deployments)} deployments")
return deployments
def fetch_commit_data() -> List[Dict]:
# Fetch commit data from GitHub
logger.info('Fetching commit data from GitHub')
url = f"https://api.github.com/repos/{ORGANIZATION}/{REPOSITORY}/commits"
headers = {
"Authorization": f"Bearer {AUTH_TOKEN}",
"Content-Type": "application/json",
}
params = {"per_page": 100}
commits = []
while url:
try:
response = requests.get(url, headers=headers, params=params)
response.raise_for_status()
data = response.json()
commits.extend(data)
url = response.links.get("next", {}).get("url")
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching commit data: {e}")
break
logger.info(f"Fetched {len(commits)} commits")
return commits
def calculate_deployment_frequency(deployments: List[Dict]) -> None:
## Calculate deployment frequency and update the Prometheus Gauge.
logger.info('Calculating deployment frequency')
deployment_stats = {}
for deployment in deployments:
branch = deployment["head_branch"]
status = deployment["conclusion"]
if branch not in deployment_stats:
deployment_stats[branch] = {"success": 0, "failure": 0, "skipped": 0, "startup_failure": 0}
if status in ["success", "failure", "skipped", "startup_failure"]:
deployment_stats[branch][status] += 1
for branch in ["dev", "staging", "main"]:
if branch in deployment_stats:
DORA_METRICS["deployment_frequency"].labels(branch=branch, repo=REPOSITORY, status="success").set(
deployment_stats[branch]["success"]
)
DORA_METRICS["deployment_frequency"].labels(branch=branch, repo=REPOSITORY, status="failure").set(
deployment_stats[branch]["failure"]
)
DORA_METRICS["deployment_frequency"].labels(branch=branch, repo=REPOSITORY, status="skipped").set(
deployment_stats[branch]["skipped"]
)
DORA_METRICS["deployment_frequency"].labels(branch=branch, repo=REPOSITORY, status="startup_failure").set(
deployment_stats[branch]["startup_failure"]
)
logger.info(f'Updated deployment frequency metrics for branch {branch}')
else:
DORA_METRICS["deployment_frequency"].labels(branch=branch, repo=REPOSITORY, status="success").set(0)
DORA_METRICS["deployment_frequency"].labels(branch=branch, repo=REPOSITORY, status="failure").set(0)
DORA_METRICS["deployment_frequency"].labels(branch=branch, repo=REPOSITORY, status="skipped").set(0)
DORA_METRICS["deployment_frequency"].labels(branch=branch, repo=REPOSITORY, status="startup_failure").set(0)
logger.info(f'No deployment data found for branch {branch}, setting metrics to 0')
def calculate_lead_time_for_changes(deployments: List[Dict], commits: List[Dict]) -> None:
## Calculate lead time for changes and update the Prometheus Gauge.
logger.info('Calculating lead time for changes')
branch_lead_times = {}
for deployment in deployments:
branch = deployment["head_branch"]
if branch not in branch_lead_times:
branch_lead_times[branch] = []
for commit in commits:
if commit["sha"] == deployment["head_sha"] and deployment["conclusion"] == "success":
commit_time = datetime.strptime(commit["commit"]["author"]["date"], "%Y-%m-%dT%H:%M:%SZ")
deployment_time = datetime.strptime(deployment["updated_at"], "%Y-%m-%dT%H:%M:%SZ")
lead_time = (deployment_time - commit_time).total_seconds()
branch_lead_times[branch].append(lead_time)
logger.debug(f'Calculated lead time for branch {branch}: {lead_time} seconds')
break
for branch in ["dev", "staging", "main"]:
if branch in branch_lead_times:
average_lead_time = mean(branch_lead_times[branch])
DORA_METRICS["lead_time_for_changes"].labels(branch=branch, repo=REPOSITORY).set(average_lead_time)
logger.info(f'Updated lead time for changes metric for branch {branch}, average: {average_lead_time} seconds')
else:
DORA_METRICS["lead_time_for_changes"].labels(branch=branch, repo=REPOSITORY).set(0)
logger.info(f'No lead time data found for branch {branch}, setting metric to 0 seconds')
def calculate_change_failure_rate(deployments: List[Dict]) -> None:
## Calculate change failure rate and update the Prometheus Gauge.
logger.info('Calculating change failure rate')
branch_failure_rates = {}
for deployment in deployments:
branch = deployment["head_branch"]
if branch not in branch_failure_rates:
branch_failure_rates[branch] = {"total": 0, "failed": 0}
branch_failure_rates[branch]["total"] += 1
if deployment["conclusion"] == "failure":
branch_failure_rates[branch]["failed"] += 1
for branch in ["dev", "staging", "main"]:
if branch in branch_failure_rates:
if branch_failure_rates[branch]["total"] > 0:
change_failure_rate = branch_failure_rates[branch]["failed"] / branch_failure_rates[branch]["total"]
DORA_METRICS["change_failure_rate"].labels(branch=branch, repo=REPOSITORY).set(change_failure_rate)
logger.info(f'Updated change failure rate metric for branch {branch}: {change_failure_rate:.2f}')
else:
DORA_METRICS["change_failure_rate"].labels(branch=branch, repo=REPOSITORY).set(0)
logger.info(f'No deployment data found for branch {branch}, setting change failure rate to 0')
else:
DORA_METRICS["change_failure_rate"].labels(branch=branch, repo=REPOSITORY).set(0)
logger.info(f'No deployment data found for branch {branch}, setting change failure rate to 0')
def calculate_mean_time_to_recovery(deployments: List[Dict]) -> None:
## Calculate mean time to recovery and update the Prometheus Gauge.
logger.info('Calculating mean time to recovery')
branch_recovery_times = {}
for deployment in deployments:
branch = deployment["head_branch"]
if branch not in branch_recovery_times:
branch_recovery_times[branch] = []
if deployment["conclusion"] == "success":
previous_failures = [
d for d in deployments
if d["head_branch"] == branch
and d["conclusion"] == "failure"
and d["updated_at"] < deployment["updated_at"]
]
if previous_failures:
last_failure = max(previous_failures, key=lambda d: d["updated_at"])
recovery_time = (
datetime.strptime(deployment["updated_at"], "%Y-%m-%dT%H:%M:%SZ") -
datetime.strptime(last_failure["updated_at"], "%Y-%m-%dT%H:%M:%SZ")
).total_seconds()
branch_recovery_times[branch].append(recovery_time)
logger.debug(f'Calculated MTTR for branch {branch}: {recovery_time} seconds')
for branch in ["dev", "staging", "main"]:
if branch in branch_recovery_times:
if branch_recovery_times[branch]:
average_mttr = mean(branch_recovery_times[branch])
DORA_METRICS["mean_time_to_recovery"].labels(branch=branch, repo=REPOSITORY).set(average_mttr)
logger.info(f'Updated mean time to recovery metric for branch {branch}: {average_mttr:.2f} seconds')
else:
DORA_METRICS["mean_time_to_recovery"].labels(branch=branch, repo=REPOSITORY).set(0)
logger.info(f'No recovery time data found for branch {branch}, setting MTTR to 0 seconds')
else:
DORA_METRICS["mean_time_to_recovery"].labels(branch=branch, repo=REPOSITORY).set(0)
logger.info(f'No deployment data found for branch {branch}, setting MTTR to 0 seconds')
def run_exporter() -> None:
## Run the DORA Exporter and start the Prometheus HTTP server.
start_http_server(8090)
logger.info('Prometheus metrics server started on port 8090')
while True:
try:
deployments = fetch_deployment_data()
commits = fetch_commit_data()
calculate_deployment_frequency(deployments)
calculate_lead_time_for_changes(deployments, commits)
calculate_change_failure_rate(deployments)
calculate_mean_time_to_recovery(deployments)
time.sleep(60)
except Exception as e:
logger.error(f'Error updating DORA metrics: {e}')
time.sleep(300)
if __name__ == "__main__":
run_exporter()
What this does:
This Python script implements a DORA (DevOps Research and Assessment) metrics exporter for Prometheus. Here's a summary of its functionality:
- It fetches deployment and commits data from GitHub for a specified organization and repository.
- It calculates and exposes four key DORA metrics:
- Deployment Frequency
- Lead Time for Changes
- Change Failure Rate
- Mean Time to Recovery (MTTR)
- These metrics are calculated for three branches: dev, staging, and main.
- The script uses the Prometheus client library to create Gauge metrics for each DORA metric.
- It runs a continuous loop that:
- Fetches the latest data from GitHub
- Calculates the metrics
- Updates the Prometheus Gauges
- Waits for 60 seconds before the next iteration
- The Prometheus HTTP server is started on port 8090, making these metrics available for scraping.
- Extensive logging is implemented to track the script's operations and any errors that occur.
- Environment variables are used to configure the GitHub organization, repository, and authentication token.
- Error handling is implemented to manage potential issues with API requests or data processing.
- Create a
.env
file in the same folder and paste the below into it:
GITHUB_ORGANIZATION=<my-organization/account>
GITHUB_REPOSITORY=<my-project-repository>
GITHUB_TOKEN=<my-GitHub-account-personal-access-token>
Replace the placeholders with the correct information.
- Create a
requirements.txt
file and paste the below into it
prometheus-client
python-dotenv
requests
- Setup a python virtual environment and install the requirements
pip install -r requirements.txt
- To start the application, run the below command:
python main.py
The application will run on localhost:8090
. At this location you will see metrics being pulled from your GitHub repository configured.
Also, you will find that the metrics are being exposed at a /metrics
endpoint localhost:8090/metrics
so that Prometheus can scrape it.
You can choose to run the exporter as a service so that incase of server failure or reboot, it can always restart.
- Create a
dora-exporter.service
file in the systems directory:
sudo nano /etc/systemd/system/dora-exporter.service
- Paste the below contents into the file:
[Unit]
Description=Dora Exporter
After=network.target
[Service]
Type=simple
User=<user>
ExecStart=/usr/bin/python3 /home/<user>/dora-exporter/main.py
WorkingDirectory=/home/<user>/dora-exporter
Restart=always
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target
Replace the user and path with the correct username and path to where you have the DORA exporter folder.
-
Save an exit the file
-
Reload the Daemon
sudo systemctl daemon reload
- Start the exporter
sudo systemctl start dora-exporter
- Enable it to start on reboot
sudo systemctl enable dora-exporter
- Check the status to confirm it is running
sudo systemctl status dora-exporter
- You can check for logs if you need to troubleshoot
sudo journalctl -u dora-exporter
- Allow TCP connections on the assigned port
sudo ufw allow 8090/tcp
- In your browser, you should see the metrics being exposed
-
Locate your
prometheus.yml
file, depending on your setup, it may be in/etc/prometheus/prometheus.yml
. -
Add the below configuration in your yml file:
scrape_configs:
- job_name: "prometheus"
static_configs:
- targets: ["91.229.239.213:9090"]
- job_name: "dora-exporter"
static_configs:
- targets: ["91.229.239.213:8090"]
-
Save and exit the file
-
Restart Prometheus to apply the changes
sudo systemctl restart prometheus.service
- Navigate to the Prometheus UI. Under Status > Targets, you should see the Dora Exporter being monitored.
To visualize the metrics being collected from your pipeline, you can create a dashboard in Grafana.
- In your Grafana UI, under
Connections
, add Prometheus as a data source. - In the dashboard tab, create a new dashboard for the DORA metrics.
- Run PROMQL queries against the DORA metrics exposed via Prometheus
- Save your dashboard
Before you install Docker Engine for the first time on a new host machine, you need to set up the Docker repository. Afterward, you can install and update Docker from the repository.
Step one: Update your machine
sudo apt-get update
Step two: Add Docker's Official GPG key
sudo apt-get install ca-certificates curl
sudo install -m 0755 -d /etc/apt/keyrings
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
sudo chmod a+r /etc/apt/keyrings/docker.asc
Step three: Add the repository to Apt sources
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
Step four: Update your machine
sudo apt-get update
Step five: Install docker package
sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
Step six: Verify that docker engine have successfully installed
sudo docker run hello-world
Made by Dhee ‖ Sudobro ‖ Stephennwachukwu ‖ Dominic-source