forked from huangmozhilv/u2net_torch
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpreprocess_taskSep.py
executable file
·115 lines (99 loc) · 5.21 KB
/
preprocess_taskSep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#### @Chao Huang([email protected]).
import warnings
# To ignore all warnings
warnings.filterwarnings("ignore")
import os
import shutil
import argparse
import json
import time
from glob2 import glob
import SimpleITK as sitk
import numpy as np
from tqdm import tqdm
import multiprocessing
import tinies
import config
import utils
tinies.sureDir(config.prepData_dir)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--nProc', type=int, default=os.cpu_count(), help='process workers to create')
parser.add_argument('--tasks', default=['Task02_Heart'], nargs='+', help='Task(s) to be preprocessed')
args = parser.parse_args()
# print(args.tasks)
###############################################################
## resample and crop et al.
# tasks = sorted([x for x in os.listdir(config.base_dir) if x.startswith('Task')])
# tasks = ['Task02_Heart', 'Task03_Liver', 'Task04_Hippocampus', 'Task05_Prostate', 'Task07_Pancreas', 'Task09_Spleen'] #
tasks = args.tasks #
print("core count: {}".format(args.nProc))
# print(tasks)
for task in tqdm(tasks):
# task = 'Task04_Hippocampus'
print("Current Task: {}".format(task))
# task_archive = tasks_archive[task]
config_task = config.set_config_task('independent', task, config.base_dir)
def prep(files, outDir, with_gt=True):
print("ids[0]:{}, current time:{}".format(os.path.basename(files[0]), str(tinies.datestr())))
for img_path in files:
# tinies.ForkedPdb().set_trace()
ID=os.path.basename(img_path).split('.')[0]
if with_gt:
lab_path = os.path.join(config.base_dir, task, 'labelsTr', ID)
else:
lab_path = None
volume_list, label, weight, original_shape, [bbmin, bbmax] = utils.preprocess(img_path, lab_path, config_task, with_gt=with_gt)
volumes = np.asarray(volume_list)
np.save(os.path.join(outDir, ID+'_volumes.npy'), volumes)
if with_gt:
np.save(os.path.join(outDir, ID+'_label.npy'), label)
np.save(os.path.join(outDir, ID+'_weight.npy'), weight)
json_info = dict()
json_info['original_shape'] = str(original_shape) # use eval() to unstr
json_info['bbox'] = str([bbmin, bbmax]) # use eval() to unstr
with open(os.path.join(outDir, ID+'.json'), 'w') as f:
json.dump(json_info, f, indent=4)
tr_files = sorted([x for x in glob(os.path.join(config.base_dir, task, 'imagesTr', '*')) if '.nii.gz' in x])
trDir = os.path.join(config.prepData_dir, task, "Tr")
tinies.sureDir(trDir) # make dir if not existing
ts_files = sorted([x for x in glob(os.path.join(config.base_dir, task, 'imagesTs', '*')) if '.nii.gz' in x])
tsDir = os.path.join(config.prepData_dir, task, "Ts")
tinies.sureDir(tsDir) # make dir if not existing
pool = multiprocessing.Pool(args.nProc) # processes=3
pool.apply_async(func=prep, args=(tr_files, trDir, True))
pool.apply_async(func=prep, args=(ts_files, tsDir, False))
pool.close() # close pool, no more processes added to pool
pool.join() # wait pool to finish, required and should be after .close()
######################################################################################
## fuse cancer to organ
## fuse 'cancer' to Liver for Task03_Liver or to Pancreas for Task07_Pancreas
## the original .npy data is copied to xxx_with_cancer_as_2
# tasks = sorted([x for x in os.listdir(config.base_dir) if x.startswith('Task')])
fuseCa = False
if fuseCa:
print('Fusing cancer to organ...')
tasks = ['Task03_Liver', 'Task07_Pancreas']
for task in tqdm(tasks):
# task = 'Task03_Liver'
print(task)
# task_archive = tasks_archive[task]
config_task = config.set_config_task('independent', task, config.base_dir)
def fuse(files, outDir, with_gt=True):
print("ids[0]:{}, current time:{}".format(os.path.basename(files[0]), str(tinies.datestr())))
for lab_path in files:
print('loading:{}'.format(lab_path))
# tinies.ForkedPdb().set_trace()
label = np.load(lab_path)
label[label == 2] = 1 # cancer fused to organ
np.save(os.path.join(lab_path), label)
task_prep_dir = os.path.join(config.prepData_dir, task)
files = sorted([x for x in glob(os.path.join(task_prep_dir, 'Tr', '*')) if '_label.npy' in x])
outDir = os.path.join(task_prep_dir, "Tr")
pool = multiprocessing.Pool(args.nProc) # processes=3
pool.apply_async(func=fuse, args=(files, outDir, True))
pool.close() # close pool, no more processes added to pool
pool.join() # wait pool to finish, required and should be after .close()
else:
print('NOT fusing cancer to organ!')
pass