forked from DeepPoolML/DeepPool
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gpuProfiler.py
52 lines (39 loc) · 1.54 KB
/
gpuProfiler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import torch
import json
from datetime import datetime, timedelta
import deeppool_bench
class GpuProfiler:
def __init__(self, device):
self.device = device
self.cache = {}
def __saveProfile(self, path="gpuProfile.json"):
with open(path, "w") as outfile:
json.dump(self.cache, outfile)
def __loadProfile(self, path="gpuProfile.json"):
try:
with open(path, "r") as f:
self.cache = json.load(f)
except IOError:
print("[GpuProfiler] No profile file exists at %s." % path)
def queryFwBwTime(self, layer, config, autocast=False):
jitmodule = layer.scriptModule()
inputs = layer.getRandomInputs(config[0])
cfg = []
ips = []
for a in inputs:
assert(type(a) == torch.Tensor)
cfg.append((a.dtype, a.shape))
ips.append(a.cuda())
key = f"{cfg}{layer.losslayer} || {autocast} || {jitmodule.inlined_graph}"
self.__loadProfile()
if key in self.cache:
return self.cache[key]
fwTime, bwTime = deeppool_bench.benchmodule(jitmodule._c, inputs, autocast)
if layer.losslayer:
output = jitmodule.forward(*inputs).detach()
targets = torch.zeros(output.size()[0], dtype=torch.int64).cuda()
bwTime += deeppool_bench.benchloss(output,
targets, layer.losslayer, autocast)
self.cache[key] = (fwTime, bwTime)
self.__saveProfile()
return (fwTime, bwTime)