forked from ppwwyyxx/RAM-multiprocess-dataloader
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main-multigpu-sharedmem.py
executable file
·56 lines (46 loc) · 1.62 KB
/
main-multigpu-sharedmem.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env python
import itertools
import multiprocessing as mp
import os
import time
import torch
import detectron2.utils.comm as comm
from detectron2.engine import launch
from common import MemoryMonitor, create_coco, DatasetFromList, read_sample
from serialize import TorchShmSerializedList
def worker(_, dataset: torch.utils.data.Dataset):
while True:
for sample in dataset:
# read the data, with a fake latency
time.sleep(0.000001)
result = read_sample(sample)
def main():
monitor = MemoryMonitor()
ds = DatasetFromList(TorchShmSerializedList(
# Don't read data except for GPU worker 0! Otherwise we waste time and (maybe) RAM.
create_coco() if comm.get_local_rank() == 0 else []))
print(monitor.table())
mp.set_forkserver_preload(["torch"])
ctx = torch.multiprocessing.start_processes(
worker, (ds, ), nprocs=4, join=False, daemon=True,
start_method='forkserver')
all_pids = comm.all_gather([os.getpid()] + ctx.pids())
all_pids = list(itertools.chain.from_iterable(all_pids))
monitor = MemoryMonitor(all_pids)
try:
for k in range(100):
# Print memory (of all processes) in the main process only.
if comm.is_main_process():
print(monitor.table())
time.sleep(1)
finally:
ctx.join()
if __name__ == "__main__":
mp.set_forkserver_preload(["torch"])
num_gpus = 2
if torch.cuda.device_count() < num_gpus:
# We don't actually need GPUs anyway.
os.environ["CUDA_VISIBLE_DEVICES"] = ""
# This uses "spawn" internally. To switch to forkserver, modifying
# detectron2 source code is needed.
launch(main, num_gpus, dist_url="auto")