jacobgil · TRex22 · Nov 5, 2022 · Nov 5, 2022 · Nov 5, 2022 · Nov 5, 2022
diff --git a/benchmarks/benchmark_functions.py b/benchmarks/benchmark_functions.py
@@ -0,0 +1,131 @@
+import argparse
+import cv2
+import numpy as np
+import torch
+import time
+import tqdm
+
+from pytorch_grad_cam import GradCAM
+
+from torch import nn
+import torch.nn.functional as F
+
+import torchvision # You may need to install separately
+from torchvision import models
+
+from torch.profiler import profile, record_function, ProfilerActivity
+
+# Simple model to test
+class SimpleCNN(nn.Module):
+ def __init__(self):
+ super(SimpleCNN, self).__init__()
+
+ # Grad-CAM interface
+ self.target_layer = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1)
+ self.target_layers = [self.target_layer]
+ self.layer4 = self.target_layer
+
+ self.cnn_stack = nn.Sequential(
+ nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
+ nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
+ nn.ReLU(inplace=True),
+ self.target_layer,
+ nn.ReLU(inplace=True),
+ nn.MaxPool2d((2, 2)),
+ nn.Flatten(),
+ nn.Linear(122880, 10),
+ nn.Linear(10, 1)
+ )
+ self.features = self.cnn_stack
+
+ def forward(self, x):
+ logits = self.cnn_stack(x)
+ logits = F.normalize(logits, dim = 0)
+
+ return logits
+
+def xavier_uniform_init(layer):
+ if type(layer) == nn.Linear or type(layer) == nn.Conv2d:
+ gain = nn.init.calculate_gain('relu')
+
+ if layer.bias is not None:
+ nn.init.zeros_(layer.bias)
+
+ nn.init.xavier_uniform_(layer.weight, gain=gain)
+
+def last_cnn_layer(model):
+ if hasattr(model, 'layer4'):
+ return model.layer4
+
+ if hasattr(model, 'conv3'):
+ return model.conv3
+
+ for feature in model.features:
+ if isinstance(feature, nn.Conv2d):
+ return feature
+
+ return None
+
+def save_image(image, path):
+ return torchvision.utils.save_image(image, path)
+
+# Code to run benchmark
+def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_test=False, progress_bar=True, method=GradCAM, input_image=None):
+ min_time = 10000000000000
+ max_time = 0
+ sum_of_times = 0
+
+ dev = torch.device('cpu')
+ if use_cuda:
+ dev = torch.device('cuda:0')
+
+ # TODO: Use real data?
+ # TODO: Configurable dimensions?
+
+ # Some defaults I use in research code
+ input_tensor = torch.rand((number_of_inputs, 3, 256, 60))
+ targets = None # [ClassifierOutputTarget(None)]
+
+ model.to(dev)
+ target_layers = [last_cnn_layer(model)] # Last CNN layer of ResNet50
+
+ cam_function = method(model=model, target_layers=target_layers, cuda_device=dev, use_cuda=use_cuda)
+ cam_function.batch_size = batch_size
+
+ pbar = tqdm.tqdm(total=number_of_inputs)
+
+ for i in range(0, number_of_inputs, batch_size):
+ start_time = time.time()
+
+ threshold_plot = torch.rand((number_of_inputs, 3, 256, 60))
+ output_image = torch.rand((number_of_inputs, 3, 256, 60))
+
+ # Actual code to benchmark
+ if input_image is None:
+ input_image = input_tensor[i:i+batch_size]
+ input_image = input_image.to(dev)
+
+ heatmap = cam_function(input_tensor=input_image, targets=targets)
+
+ if workflow_test:
+ for j in range(heatmap.shape[0]):
+ # Create a binary map
+ threshold_plot = torch.where(torch.tensor(heatmap[j]).to(torch.device('cuda:0')) > 0.5, 1, 0).to(dev)
+ output_image = input_image * threshold_plot
+
+ end_time = time.time()
+ time_difference = end_time - start_time
+
+ sum_of_times += time_difference
+
+ if time_difference > max_time:
+ max_time = time_difference
+
+ if time_difference < min_time:
+ min_time = time_difference
+
+ if progress_bar:
+ pbar.update(batch_size)
+
+ avg_time = sum_of_times / number_of_inputs
+ return [min_time, max_time, avg_time, [threshold_plot, output_image]]
diff --git a/benchmarks/methods_benchmark.py b/benchmarks/methods_benchmark.py
@@ -0,0 +1,64 @@
+import argparse
+import cv2
+import numpy as np
+import torch
+import time
+import tqdm
+
+from pytorch_grad_cam import GradCAM, \
+ ScoreCAM, \
+ GradCAMPlusPlus, \
+ AblationCAM, \
+ XGradCAM, \
+ EigenCAM, \
+ EigenGradCAM, \
+ LayerCAM, \
+ FullGrad
+
+from torch import nn
+import torch.nn.functional as F
+
+import torchvision # You may need to install separately
+from torchvision import models
+
+from torch.profiler import profile, record_function, ProfilerActivity
+
+import benchmark_functions
+
+number_of_inputs = 1000
+
+print(f'Benchmarking GradCAM using {number_of_inputs} images for multiple models...')
+
+methods_to_benchmark = [
+ ['GradCAM', GradCAM],
+ ['ScoreCAM', ScoreCAM],
+ ['GradCAMPlusPlus', GradCAMPlusPlus],
+ ['AblationCAM', AblationCAM],
+ ['XGradCAM', XGradCAM],
+ ['EigenCAM', EigenCAM],
+ ['EigenGradCAM', EigenGradCAM],
+ ['LayerCAM', LayerCAM],
+ ['FullGrad', FullGrad]
+]
+
+model = benchmark_functions.SimpleCNN()
+# model = models.resnet18()
+
+model.apply(benchmark_functions.xavier_uniform_init) # Randomise more weights
+
+for method_name, method in tqdm.tqdm(methods_to_benchmark):
+ print('==============================================================================\n\n')
+ print(f'Simple Workflow for method #{method_name}:\n')
+
+ cpu_min_time, cpu_max_time, cpu_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_test=True, progress_bar=False, method=method)
+ cuda_min_time, cuda_max_time, cuda_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=True, workflow_test=True, progress_bar=False, method=method)
+
+ print(f'Cuda Min time: {cuda_min_time}\n')
+ print(f'Cuda Max time: {cuda_max_time}\n')
+ print(f'Cuda Avg time: {cuda_avg_time}\n\n')
+ print(f'CPU Min time: {cpu_min_time}\n')
+ print(f'CPU Max time: {cpu_max_time}\n')
+ print(f'CPU Avg time: {cpu_avg_time}\n')
+
+print('==============================================================================\n\n')
+print('Done!')
diff --git a/benchmarks/models_benchmark.py b/benchmarks/models_benchmark.py
@@ -0,0 +1,53 @@
+import argparse
+import cv2
+import numpy as np
+import torch
+import time
+import tqdm
+
+from pytorch_grad_cam import GradCAM
+
+from torch import nn
+import torch.nn.functional as F
+
+import torchvision # You may need to install separately
+from torchvision import models
+
+from torch.profiler import profile, record_function, ProfilerActivity
+
+import benchmark_functions
+
+number_of_inputs = 1000
+
+print(f'Benchmarking GradCAM using {number_of_inputs} images for multiple models...')
+
+models_to_benchmark = [
+ ["SimpleCNN", benchmark_functions.SimpleCNN()],
+ ["resnet18", models.resnet18()],
+ ["resnet34", models.resnet34()],
+ ["resnet50", models.resnet50()],
+ ["alexnet", models.alexnet()],
+ ["vgg16", models.vgg16()],
+ ["googlenet", models.googlenet()],
+ ["mobilenet_v2", models.mobilenet_v2()],
+ ["densenet161", models.densenet161()]
+]
+
+for model_name, model in tqdm.tqdm(models_to_benchmark):
+ print('==============================================================================\n\n')
+ print(f'Simple Workflow for model #{model_name}:\n')
+
+ model.apply(benchmark_functions.xavier_uniform_init) # Randomise more weights
+ cpu_min_time, cpu_max_time, cpu_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_test=True, progress_bar=False)
+ cuda_min_time, cuda_max_time, cuda_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=True, workflow_test=True, progress_bar=False)
+
+ print(f'Cuda Min time: {cuda_min_time}\n')
+ print(f'Cuda Max time: {cuda_max_time}\n')
+ print(f'Cuda Avg time: {cuda_avg_time}\n\n')
+ print(f'CPU Min time: {cpu_min_time}\n')
+ print(f'CPU Max time: {cpu_max_time}\n')
+ print(f'CPU Avg time: {cpu_avg_time}\n')
+
+
+print('==============================================================================\n\n')
+print('Done!')
diff --git a/benchmarks/single_image_benchmark.py b/benchmarks/single_image_benchmark.py
@@ -0,0 +1,131 @@
+import argparse
+import cv2
+import numpy as np
+import torch
+import time
+import tqdm
+
+from pytorch_grad_cam import GradCAM, \
+ ScoreCAM, \
+ GradCAMPlusPlus, \
+ AblationCAM, \
+ XGradCAM, \
+ EigenCAM, \
+ EigenGradCAM, \
+ LayerCAM, \
+ FullGrad
+
+from torch import nn
+import torch.nn.functional as F
+
+import torchvision # You may need to install separately
+from torchvision import models
+
+from torch.profiler import profile, record_function, ProfilerActivity
+
+import benchmark_functions
+
+number_of_inputs = 1
+model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
+
+# Just hard-coding a path for now
+image_path = '~/image.jpg'
+input_tensor = torchvision.io.read_image(image_path)
+
+print(f'Benchmarking GradCAM using {number_of_inputs} image for ResNet50...')
+
+# Run on CPU with profiler (save the profile to print later)
+# print('Profile list of images on CPU...')
+# with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof:
+# cpu_profile_min_time, cpu_profile_max_time, cpu_profile_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False)
+# cpu_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15)
+
+# Run on CUDA with profiler (save the profile to print later)
+print('Profile list of images on Cuda...')
+with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof:
+ cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True)
+cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15)
+
+# Run on CUDA with extra workflow
+print('Profile list of images on Cuda and then run workflow...')
+with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof:
+ cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True)
+work_flow_cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15)
+
+# Run on CUDA with extra workflow
+print('Profile list of images on Cuda and then run workflow with a simple CNN...')
+model = benchmark_functions.SimpleCNN()
+model.apply(benchmark_functions.xavier_uniform_init) # Randomise more weights
+with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof:
+ cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True)
+simple_work_flow_cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15)
+
+model = models.resnet50()
+# Run on CPU x1000 (get min, max, and avg times)
+# print('Run list of images on CPU...')
+# cpu_min_time, cpu_max_time, cpu_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False)
+
+# Run on CUDA x1000
+print('Run list of images on Cuda...')
+cuda_min_time, cuda_max_time, cuda_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True)
+
+# Run Workflow
+print('Run list of images on Cuda with a workflow...')
+workflow_cuda_min_time, workflow_cuda_max_time, workflow_cuda_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True)
+
+print('Run list of images on Cuda with a workflow using simple CNN...')
+model = benchmark_functions.SimpleCNN()
+model.apply(benchmark_functions.xavier_uniform_init) # Randomise more weights
+simple_workflow_cuda_min_time, simple_workflow_cuda_max_time, simple_workflow_cuda_avg_time, output = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True)
+
+print('Complete!')
+
+# print('==============================================================================\n\n')
+# print('CPU Profile:\n')
+# print(cpu_profile)
+
+print('==============================================================================\n\n')
+print('Cuda Profile:\n')
+print(cuda_profile)
+
+print('==============================================================================\n\n')
+print('Workflow Cuda Profile:\n')
+print(work_flow_cuda_profile)
+
+print('==============================================================================\n\n')
+print('Simple Workflow Cuda Profile:\n')
+print(simple_work_flow_cuda_profile)
+
+# print('==============================================================================\n\n')
+# print('CPU Timing (No Profiler):\n')
+# print(f'Min time: {cpu_min_time}\n')
+# print(f'Max time: {cpu_max_time}\n')
+# print(f'Avg time: {cpu_avg_time}\n')
+
+print('==============================================================================\n\n')
+print('Cuda Timing (No Profiler):\n')
+print(f'Min time: {cuda_min_time}\n')
+print(f'Max time: {cuda_max_time}\n')
+print(f'Avg time: {cuda_avg_time}\n')
+
+print('==============================================================================\n\n')
+print('Workflow Cuda Timing (No Profiler):\n')
+print(f'Min time: {workflow_cuda_min_time}\n')
+print(f'Max time: {workflow_cuda_max_time}\n')
+print(f'Avg time: {workflow_cuda_avg_time}\n')
+
+print('==============================================================================\n\n')
+print('Simple Workflow Cuda Timing (No Profiler):\n')
+print(f'Min time: {simple_workflow_cuda_min_time}\n')
+print(f'Max time: {simple_workflow_cuda_max_time}\n')
+print(f'Avg time: {simple_workflow_cuda_avg_time}\n')
+
+print('==============================================================================\n\n')
+print('Output the resultant heat-map')
+threshold_plot, output_image = output
+
+benchmark_functions.save_image(threshold_plot.to("cpu", torch.uint8), '~/threshold.png')
+benchmark_functions.save_image(output_image.to("cpu", torch.uint8), '~/output_image.png')
+
+print('==============================================================================\n\n')
+print('Done!')