Add metrics to Python implementation

dominickp · Feb 13, 2024 · 9a4af63 · 9a4af63
1 parent b0146dd
commit 9a4af63
Show file tree

Hide file tree

Showing 4 changed files with 65 additions and 2 deletions.
diff --git a/python/requirements.txt b/python/requirements.txt
@@ -2,3 +2,4 @@ Flask==3.0.2
 requests==2.31.0 
 furl==2.1.3
 Flask-Cors==4.0.0
+prometheus-client==0.19.0 
diff --git a/python/src/chan_client.py b/python/src/chan_client.py
@@ -1,7 +1,9 @@
 import os
 import requests
 import logging
+from time import time
 from furl import furl
+from metrics import METRIC_FANOUT_REQUEST_TIME
 
 REQUEST_TIMEOUT = 5
 DEFAULT_CHAN_HOST = "https://a.4cdn.org"
@@ -12,6 +14,19 @@
 class ChanClient:
     def __init__(self, host=CHAN_HOST):
         self.host = host
+
+    def handle_request(self, method, url, headers={}):
+        """
+        Make a request and record the time spent on the request.
+        :param method: The HTTP method to use (e.g. "GET").
+        :param url: The URL to request.
+        :param headers: Optional headers to send with the request.
+        """
+        start = time()
+        chan_r = requests.request(method, url, timeout=REQUEST_TIMEOUT, headers=headers)
+        total_time = time() - start
+        METRIC_FANOUT_REQUEST_TIME.labels(method, url, chan_r.status_code).observe(total_time)
+        return chan_r
 
     def get_catalog(self, board, headers={}):
         """
@@ -27,7 +42,7 @@ def get_catalog(self, board, headers={}):
         # Example: https://a.4cdn.org/po/catalog.json
         url = furl(CHAN_HOST).add(path=[board, "catalog.json"]).url
 
-        chan_r = requests.get(url, timeout=REQUEST_TIMEOUT, headers=headers)
+        chan_r = self.handle_request("GET", url, headers)
         if not chan_r:
             logging.error(f"Failed to get catalog for board {board}.")
             raise Exception(f"Failed to get catalog for board {board}.")

diff --git a/python/src/main.py b/python/src/main.py
@@ -1,17 +1,29 @@
-from flask import Flask, request, Response
+from flask import Flask, request, Response, g
+from werkzeug.middleware.dispatcher import DispatcherMiddleware
+from prometheus_client import make_wsgi_app
+from time import time
 from flask_cors import CORS
 from chan_client import ChanClient
 from cli import get_cli_from_chan_catalog
+from metrics import METRIC_TOTAL_REQUEST_TIME
 
 app = Flask(__name__)
 CORS(app)
 
+# Add prometheus wsgi middleware to route /metrics requests
+app.wsgi_app = DispatcherMiddleware(app.wsgi_app, {
+    "/metrics": make_wsgi_app()
+})
 
 @app.before_request
 def before_request():
     """
     Check if the request is from curl. If not, return an HTML message to the user.
     """
+
+    # Track the start time of the request to the flask object
+    g.start = time()
+
     # Allow CORS preflight requests
     if request.method.lower() == "options":
         return "", 204
@@ -23,6 +35,19 @@ def before_request():
         </div>""", 400
     pass
 
+@app.after_request
+def after_request(response):
+    """
+    Capture the time spent on the request and record it as a metric.
+    """
+    # Calculate the total time spent on the request
+    total_time = time() - g.start
+    # Get the normalized route like "/<string:name>/<int:page>" if url_rule is present
+    normalized_path = request.url_rule.rule if request.url_rule else "unmatched-route"
+    # Ensure unmatched routes are recorded as "not_found"
+    METRIC_TOTAL_REQUEST_TIME.labels(request.method, normalized_path, response.status_code).observe(total_time)
+    return response
+
 @app.route("/")
 def index():
     response = Response("You should call /<board>/<page> to get the catalog of a board.\n")

diff --git a/python/src/metrics.py b/python/src/metrics.py
@@ -0,0 +1,22 @@
+from prometheus_client import start_http_server, Histogram
+import time
+
+# Create a metric to track time spent and requests made.
+ms_buckets = [
+    25, 50, 75, 100, 125 ,150, 175, 200, 225, 250, 275, 300, 325, 350, 375, 400, 425, 450, 475, 500, 600, 
+    700, 800, 900, 1000, 2000, 3000, 4000, 5000, "+Inf"
+]
+METRIC_TOTAL_REQUEST_TIME = Histogram(
+    "total_request_seconds", 
+    "Time spent making requests to the service", 
+    ["method", "endpoint", "response_code"],
+    buckets=ms_buckets
+)
+METRIC_FANOUT_REQUEST_TIME = Histogram(
+    "fanout_request_seconds", 
+    "Time spent processing fanout requests", 
+    ["method", "endpoint", "response_code"],
+    buckets=ms_buckets
+)
+
+