From 4ee082f88bebb6cbdc8d8617868059e949766721 Mon Sep 17 00:00:00 2001
From: Patrice Vignola <vignola.patrice@gmail.com>
Date: Sat, 4 May 2024 16:41:45 -0700
Subject: [PATCH] Make change in benchmark as well

---
 benchmark/python/benchmark_e2e.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/benchmark/python/benchmark_e2e.py b/benchmark/python/benchmark_e2e.py
index d18f6acc9..a0d243636 100644
--- a/benchmark/python/benchmark_e2e.py
+++ b/benchmark/python/benchmark_e2e.py
@@ -85,6 +85,9 @@ def main(args):
             generator.generate_next_token()
         if args.print_model_output: print(tokenizer.decode(generator.get_sequence(0)))
 
+        # Delete the generator to free the captured graph for the next generator, if graph capture is enabled
+        del generator
+
     tokenize_times = []
     prompt_times = []
     token_gen_times = []
@@ -141,6 +144,9 @@ def main(args):
         wall_clock_times.append(wall_clock_end_time - wall_clock_start_time)
         if args.print_model_output: print(tokenizer.decode(generator.get_sequence(0)))
 
+        # Delete the generator to free the captured graph for the next generator, if graph capture is enabled
+        del generator
+
     # Calculate tokenization metrics
     avg_tokenization_latency_s = sum(tokenize_times) / len(tokenize_times)
     avg_tokenization_latency_ms = avg_tokenization_latency_s * 1000