diff --git a/runner/bench.py b/runner/bench.py index 1732d9c4..c97c4f22 100644 --- a/runner/bench.py +++ b/runner/bench.py @@ -1,49 +1,124 @@ +"""This script benchmarks GPU memory usage and inference time for various AI pipelines. +""" + import argparse import os from time import time -from typing import List +from typing import List, Any import numpy as np import torch from app.main import load_pipeline from app.pipelines.base import Pipeline -from app.pipelines.image_to_image import ImageToImagePipeline -from app.pipelines.image_to_video import ImageToVideoPipeline -from app.pipelines.text_to_image import TextToImagePipeline from PIL import Image from pydantic import BaseModel +from pathlib import Path +from starlette.datastructures import UploadFile + +CURRENT_DIR = Path(__file__).parent PROMPT = "a mountain lion" -IMAGE = "images/test.png" +EXAMPLE_IMAGE_PATH = Path(CURRENT_DIR, "example_data/image.png") +EXAMPLE_IMAGE_LOW_RES_PATH = Path(CURRENT_DIR, "example_data/image-low-res.png") +EXAMPLE_AUDIO_FILE_PATH = Path(CURRENT_DIR, "example_data/test_audio.flac") + + +def create_upload_file(file_path: Path) -> UploadFile: + """Creates an UploadFile object from a file path. + + Args: + file_path: The path to the file. + + Returns: + UploadFile: The UploadFile object. + """ + try: + return UploadFile(filename=file_path.name, file=open(file_path, "rb")) + except IOError as e: + print(f"Error opening file {file_path}: {e}") + raise + + +def get_example_input(pipeline_name: str) -> dict: + """Returns example input for the specified pipeline. + + Args: + pipeline_name: The name of the pipeline. + + Returns: + dict: A dictionary containing example input for the specified pipeline. + + Raises: + NotImplementedError: If example input is not implemented for the specified + pipeline. + """ + try: + image_path = ( + EXAMPLE_IMAGE_LOW_RES_PATH + if "UpscalePipeline" in pipeline_name + else EXAMPLE_IMAGE_PATH + ) + example_image = Image.open(image_path).convert("RGB") + except IOError as e: + print(f"Error opening image file {EXAMPLE_IMAGE_PATH}: {e}") + raise + + example_inputs = { + "AudioToTextPipeline": {"audio": create_upload_file(EXAMPLE_AUDIO_FILE_PATH)}, + "TextToImagePipeline": {"prompt": PROMPT}, + "ImageToImagePipeline": {"prompt": PROMPT, "image": example_image}, + "ImageToVideoPipeline": {"image": example_image}, + "UpscalePipeline": {"prompt": PROMPT, "image": example_image}, + "SegmentAnything2Pipeline": {"image": example_image}, + } + + if pipeline_name not in example_inputs: + raise NotImplementedError( + f"Example input not implemented for this pipeline: {pipeline_name}" + ) + + return example_inputs[pipeline_name] class BenchMetrics(BaseModel): + """A class to store benchmarking metrics.""" + inference_time: float - inference_time_per_output: float max_mem_allocated: float max_mem_reserved: float -def call_pipeline(pipeline: Pipeline, batch_size=1, **kwargs) -> List[any]: - if isinstance(pipeline, TextToImagePipeline): - prompts = [PROMPT] * batch_size - return pipeline(prompts, **kwargs) - elif isinstance(pipeline, ImageToImagePipeline): - prompts = [PROMPT] * batch_size - images = [Image.open(IMAGE).convert("RGB")] * batch_size - return pipeline(prompts, images, **kwargs) - elif isinstance(pipeline, ImageToVideoPipeline): - images = [Image.open(IMAGE).convert("RGB")] * batch_size - return pipeline(images, **kwargs) - else: - raise Exception("invalid pipeline") +def call_pipeline(pipeline: Pipeline, **kwargs) -> List[Any]: + """Calls a pipeline with example inputs. + + Args: + pipeline: The pipeline to call. + **kwargs: Additional keyword arguments to pass to the pipeline. + + Returns: + List: The output of the pipeline. + """ + example_kwargs = get_example_input(pipeline.__class__.__name__) + kwargs.update(example_kwargs) + return pipeline(**kwargs) def bench_pipeline( - pipeline: Pipeline, batch_size=1, runs=1, num_inference_steps=None + pipeline: Pipeline, + runs: int = 1, + num_inference_steps: int = None, ) -> BenchMetrics: + """Benchmarks a pipeline by calling it multiple times and collecting metrics. + + Args: + pipeline: The pipeline to benchmark. + runs: The number of times to call the pipeline. + num_inference_steps: The number of inference steps to run for the pipeline. + + Returns: + BenchMetrics: The benchmarking metrics. + """ inference_time = np.zeros(runs) - inference_time_per_output = np.zeros(runs) max_mem_allocated = np.zeros(runs) max_mem_reserved = np.zeros(runs) @@ -55,39 +130,28 @@ def bench_pipeline( for i in range(runs): start = time() - output = call_pipeline(pipeline, batch_size, **kwargs) + output = call_pipeline(pipeline, **kwargs) if isinstance(output, tuple): output = output[0] - assert len(output) == batch_size inference_time[i] = time() - start - inference_time_per_output[i] = inference_time[i] / batch_size max_mem_allocated[i] = torch.cuda.max_memory_allocated() / 1024**3 max_mem_reserved[i] = torch.cuda.max_memory_reserved() / 1024**3 - print(f"inference {i} {batch_size=} time: {inference_time[i]:.3f}s") - print( - f"inference {i} {batch_size=} time per output: " - f"{inference_time_per_output[i]:.3f}s" - ) - print( - f"inference {i} {batch_size=} max GPU memory allocated: " - f"{max_mem_allocated[i]:.3f}GiB" - ) + print(f"inference {i+1} time: {inference_time[i]:.3f}s") print( - f"inference {i} {batch_size=} max GPU memory reserved: " - f"{max_mem_reserved[i]:.3f}GiB" + f"inference {i+1} max GPU memory allocated: {max_mem_allocated[i]:.3f}GiB" ) + print(f"inference {i+1} max GPU memory reserved: {max_mem_reserved[i]:.3f}GiB") return BenchMetrics( inference_time=inference_time.mean(), - inference_time_per_output=inference_time_per_output.mean(), max_mem_allocated=max_mem_allocated.mean(), max_mem_reserved=max_mem_reserved.mean(), ) -if __name__ == "__main__": +def main(): parser = argparse.ArgumentParser(description="A benchmarking tool for AI pipelines") parser.add_argument( "--pipeline", type=str, required=True, help="the name of the pipeline" @@ -110,38 +174,35 @@ def bench_pipeline( type=int, default=None, required=False, - help="the number of inference steps to run for the pipeline", - ) - parser.add_argument( - "--batch_size", type=int, default=1, required=False, help="the size of a batch" + help=( + "the number of inference steps to run for the pipeline. Not all pipelines " + "support this.", + ), ) - args = parser.parse_args() - print( - f"{args.pipeline=} {args.model_id=} {args.runs=} {args.batch_size=} " - f"{args.num_inference_steps=}" - ) + print("Starting benchmark...") + args_dict = vars(args) + print_parts = [ + f"{key}={value}" for key, value in args_dict.items() if value is not None + ] + print(", ".join(print_parts)) start = time() pipeline = load_pipeline(args.pipeline, args.model_id) - # Collect pipeline load metrics + # Collect pipeline load metrics. load_time = time() - start load_max_mem_allocated = torch.cuda.max_memory_allocated() / 1024**3 load_max_mem_reserved = torch.cuda.max_memory_reserved() / 1024**3 - # Collect pipeline warmup metrics if stable-fast is enabled + # Collect pipeline warmup metrics if stable-fast is enabled. if os.getenv("SFAST", "").strip().lower() == "true": warmups = 3 - warmup_metrics = bench_pipeline( - pipeline, args.batch_size, warmups, args.num_inference_steps - ) + warmup_metrics = bench_pipeline(pipeline, warmups, args.num_inference_steps) - # Collect pipeline inference metrics - metrics = bench_pipeline( - pipeline, args.batch_size, args.runs, args.num_inference_steps - ) + # Collect pipeline inference metrics. + metrics = bench_pipeline(pipeline, args.runs, args.num_inference_steps) print("\n") print("----AGGREGATE METRICS----") @@ -153,10 +214,6 @@ def bench_pipeline( if os.getenv("SFAST", "").strip().lower() == "true": print(f"avg warmup inference time: {warmup_metrics.inference_time:.3f}s") - print( - f"avg warmup inference time per output: " - f"{warmup_metrics.inference_time_per_output:.3f}s" - ) print( f"avg warmup inference max GPU memory allocated: " f"{warmup_metrics.max_mem_allocated:.3f}GiB" @@ -167,6 +224,9 @@ def bench_pipeline( ) print(f"avg inference time: {metrics.inference_time:.3f}s") - print(f"avg inference time per output: {metrics.inference_time_per_output:.3f}s") print(f"avg inference max GPU memory allocated: {metrics.max_mem_allocated:.3f}GiB") print(f"avg inference max GPU memory reserved: {metrics.max_mem_reserved:.3f}GiB") + + +if __name__ == "__main__": + main() diff --git a/runner/example_data/image-low-res.png b/runner/example_data/image-low-res.png new file mode 100644 index 00000000..9ab36fba Binary files /dev/null and b/runner/example_data/image-low-res.png differ diff --git a/runner/images/test.png b/runner/example_data/image.png similarity index 100% rename from runner/images/test.png rename to runner/example_data/image.png diff --git a/runner/example_data/test_audio.flac b/runner/example_data/test_audio.flac new file mode 100644 index 00000000..7a2b8ef0 Binary files /dev/null and b/runner/example_data/test_audio.flac differ