-
Notifications
You must be signed in to change notification settings - Fork 13
/
profile.py
84 lines (66 loc) · 2.63 KB
/
profile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#%%
import os, subprocess
import numpy as np
os.environ["CUDA_VISIBLE_DEVICES"]=str(np.argmax([int(x.split()[2])
for x in subprocess.Popen("nvidia-smi -q -d Memory | grep -A4 GPU | grep Free", shell=True, stdout=subprocess.PIPE).stdout.readlines()]))
import torch
import torch.nn as nn
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
from Experimental_root.scripts.profiler import MyTimeit, MyFlops
def main(test_name, inp, device, opt):
inp = inp.to(device)
if isinstance(test_name, nn.Module):
test_name = test_name.to(device)
print('size of tensor '+str(inp.shape))
print('use device ', device)
with torch.no_grad(): out = test_name(inp)
if isinstance(out, torch.Tensor):
print('output shape is', out.shape)
elif isinstance(out, list):
print('output shape is', out[-1].shape)
del out
torch.cuda.empty_cache()
if isinstance(test_name, nn.Module):
test_name.eval()
print('size of tensor'+str(inp.shape))
print('use device', device)
with torch.no_grad():
# for profiler in (MyTimeit('line'),):
# for profiler in (MyFlops(mode='thop'),):
# for profiler in (MyTimeit('torchprofile19', path='./torchprofiler/'+opt['name']),):
for profiler in (MyTimeit('time'),
# MyFlops(mode='ptflops')
):
print('\n test function name: '+str(test_name.__class__))
new_function = profiler(test_name)
out = new_function(inp)
del out
torch.cuda.empty_cache()
max_mem = torch.cuda.max_memory_allocated()/1024/1024/1024
print("max memory required \t\t %.2fGB"%max_mem)
print("*****************************************************************************")
# %%
if __name__ == '__main__':
import yaml
import os.path as osp
from basicsr.utils.options import *
from basicsr.models import build_model
root_path = osp.abspath(osp.join(__file__, osp.pardir))
test_path = "./options/test/bsvd_c64.yml"
with open(test_path, mode='r') as f:
opt = yaml.load(f, Loader=ordered_yaml()[0])
opt['dist'] = False
opt['is_train'] = False
opt['root_path'] = root_path
temp_psz=10
inp = torch.randn(1, temp_psz, 4, 540, 960).cuda()
opt['val']['temp_psz'] = temp_psz
device = torch.device("cuda")
print("device", device)
model = build_model(opt)
print(model)
net_g_forward = model.net_g.half()
net_g_forward.eval()
with torch.cuda.amp.autocast(True):
main(net_g_forward, inp, device, opt)