diff --git a/ucp/benchmarks/backends/tornado.py b/ucp/benchmarks/backends/tornado.py
index 1a9e7b27e..3fdc140ee 100644
--- a/ucp/benchmarks/backends/tornado.py
+++ b/ucp/benchmarks/backends/tornado.py
@@ -86,6 +86,14 @@ async def run(self) -> bool:
             recv_msg = np.zeros(self.args.n_bytes, dtype="u1")
             assert recv_msg.nbytes == self.args.n_bytes
 
+        if self.args.report_gil_contention:
+            from gilknocker import KnockKnock
+
+            # Use smallest polling interval possible to ensure, contention will always
+            # be zero for small messages otherwise and inconsistent for large messages.
+            knocker = KnockKnock(polling_interval_micros=1)
+            knocker.start()
+
         times = []
         for i in range(self.args.n_iter + self.args.n_warmup_iter):
             start = monotonic()
@@ -99,4 +107,10 @@ async def run(self) -> bool:
             stop = monotonic()
             if i >= self.args.n_warmup_iter:
                 times.append(stop - start)
+
+        if self.args.report_gil_contention:
+            knocker.stop()
+
         self.queue.put(times)
+        if self.args.report_gil_contention:
+            self.queue.put(knocker.contention_metric)
diff --git a/ucp/benchmarks/backends/ucp_async.py b/ucp/benchmarks/backends/ucp_async.py
index 62ac14306..e28d8f17d 100644
--- a/ucp/benchmarks/backends/ucp_async.py
+++ b/ucp/benchmarks/backends/ucp_async.py
@@ -128,6 +128,15 @@ async def run(self):
 
         if self.args.cuda_profile:
             xp.cuda.profiler.start()
+
+        if self.args.report_gil_contention:
+            from gilknocker import KnockKnock
+
+            # Use smallest polling interval possible to ensure, contention will always
+            # be zero for small messages otherwise and inconsistent for large messages.
+            knocker = KnockKnock(polling_interval_micros=1)
+            knocker.start()
+
         times = []
         for i in range(self.args.n_iter + self.args.n_warmup_iter):
             start = monotonic()
@@ -143,9 +152,15 @@ async def run(self):
             stop = monotonic()
             if i >= self.args.n_warmup_iter:
                 times.append(stop - start)
+
+        if self.args.report_gil_contention:
+            knocker.stop()
         if self.args.cuda_profile:
             xp.cuda.profiler.stop()
+
         self.queue.put(times)
+        if self.args.report_gil_contention:
+            self.queue.put(knocker.contention_metric)
 
     def print_backend_specific_config(self):
         print_key_value(
diff --git a/ucp/benchmarks/backends/ucp_core.py b/ucp/benchmarks/backends/ucp_core.py
index 38799489c..30f0393b4 100644
--- a/ucp/benchmarks/backends/ucp_core.py
+++ b/ucp/benchmarks/backends/ucp_core.py
@@ -263,6 +263,13 @@ def op_completed():
 
         if self.args.cuda_profile:
             xp.cuda.profiler.start()
+        if self.args.report_gil_contention:
+            from gilknocker import KnockKnock
+
+            # Use smallest polling interval possible to ensure, contention will always
+            # be zero for small messages otherwise and inconsistent for large messages.
+            knocker = KnockKnock(polling_interval_micros=1)
+            knocker.start()
 
         times = []
         last_iter = self.args.n_iter + self.args.n_warmup_iter - 1
@@ -292,10 +299,14 @@ def op_completed():
             if i >= self.args.n_warmup_iter:
                 times.append(stop - start)
 
+        if self.args.report_gil_contention:
+            knocker.stop()
         if self.args.cuda_profile:
             xp.cuda.profiler.stop()
 
         self.queue.put(times)
+        if self.args.report_gil_contention:
+            self.queue.put(knocker.contention_metric)
 
     def print_backend_specific_config(self):
         delay_progress_str = (
diff --git a/ucp/benchmarks/send_recv.py b/ucp/benchmarks/send_recv.py
index ec2630b67..7a65c3436 100644
--- a/ucp/benchmarks/send_recv.py
+++ b/ucp/benchmarks/send_recv.py
@@ -97,6 +97,8 @@ def client(queue, port, server_address, args):
         client.run()
 
     times = queue.get()
+    if args.report_gil_contention:
+        contention_metric = queue.get()
 
     assert len(times) == args.n_iter
     bw_avg = format_bytes(2 * args.n_iter * args.n_bytes / sum(times))
@@ -133,6 +135,8 @@ def client(queue, port, server_address, args):
     print_key_value("Bandwidth (median)", value=f"{bw_med}/s")
     print_key_value("Latency (average)", value=f"{lat_avg} ns")
     print_key_value("Latency (median)", value=f"{lat_med} ns")
+    if args.report_gil_contention:
+        print_key_value("GIL contention", value=f"{contention_metric}")
     if not args.no_detailed_report:
         print_separator(separator="=")
         print_key_value(key="Iterations", value="Bandwidth, Latency")
@@ -297,6 +301,12 @@ def parse_args():
         help="Backend Library (-l) to use, options are: 'ucp-async' (default), "
         "'ucp-core' and 'tornado'.",
     )
+    parser.add_argument(
+        "--report-gil-contention",
+        default=False,
+        action="store_true",
+        help="Report GIL contention (requires the `gilknocker` package).",
+    )
     parser.add_argument(
         "--delay-progress",
         default=False,
@@ -337,6 +347,15 @@ def parse_args():
     if args.backend != "ucp-core" and args.delay_progress:
         raise RuntimeError("`--delay-progress` requires `--backend=ucp-core`")
 
+    if args.report_gil_contention:
+        try:
+            import gilknocker  # noqa: F401
+        except ImportError:
+            raise RuntimeError(
+                "Could not import `gilknocker`. Make sure it is installed or "
+                "remove the `--report-gil-contention` argument."
+            )
+
     return args