From 4ee082f88bebb6cbdc8d8617868059e949766721 Mon Sep 17 00:00:00 2001 From: Patrice Vignola Date: Sat, 4 May 2024 16:41:45 -0700 Subject: [PATCH] Make change in benchmark as well --- benchmark/python/benchmark_e2e.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/benchmark/python/benchmark_e2e.py b/benchmark/python/benchmark_e2e.py index d18f6acc9..a0d243636 100644 --- a/benchmark/python/benchmark_e2e.py +++ b/benchmark/python/benchmark_e2e.py @@ -85,6 +85,9 @@ def main(args): generator.generate_next_token() if args.print_model_output: print(tokenizer.decode(generator.get_sequence(0))) + # Delete the generator to free the captured graph for the next generator, if graph capture is enabled + del generator + tokenize_times = [] prompt_times = [] token_gen_times = [] @@ -141,6 +144,9 @@ def main(args): wall_clock_times.append(wall_clock_end_time - wall_clock_start_time) if args.print_model_output: print(tokenizer.decode(generator.get_sequence(0))) + # Delete the generator to free the captured graph for the next generator, if graph capture is enabled + del generator + # Calculate tokenization metrics avg_tokenization_latency_s = sum(tokenize_times) / len(tokenize_times) avg_tokenization_latency_ms = avg_tokenization_latency_s * 1000