-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_raw.py
79 lines (64 loc) · 3.54 KB
/
parse_raw.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import sys
import argparse
import os
from glob import glob
import json
import SimpleITK as sitk
# importing utils and
from utils.logger import logger
from utils.dataset import read_raw
from enums.dtype import DataTypes
if __name__ == "__main__":
# optional arguments from the command line
parser = argparse.ArgumentParser()
parser.add_argument('--dataset_path', type=str, default='dataset/train', help='root dir for raw training data')
# parse the arguments
args = parser.parse_args()
# check if the dataset_path exists
if not os.path.exists(args.dataset_path):
logger.error(f"Path {args.dataset_path} does not exist")
sys.exit(1)
# get the list of exhale and inhale files from the dataset_path
logger.info(f"Reading raw data from '{args.dataset_path}'")
exhale_volumes = [path.replace('\\', '/') for path in sorted(glob(os.path.join(args.dataset_path, "***" , "*eBHCT.img"), recursive=True))]
inhale_volumes = [path.replace('\\', '/') for path in sorted(glob(os.path.join(args.dataset_path, "***" , "*iBHCT.img"), recursive=True))]
# log the number of exhale and inhale files
logger.info(f"Found {len(exhale_volumes)} exhale volumes: ({[subject.split('/')[-2] for subject in exhale_volumes]})")
logger.info(f"Found {len(inhale_volumes)} inhale volumes: ({[subject.split('/')[-2] for subject in inhale_volumes]})\n")
# read the data dictionary
with open(os.path.join(args.dataset_path.replace("train", "", 1).replace("test", "", 1), 'description.json'), 'r') as json_file:
dictionary = json.loads(json_file.read())
# iterate over all of the raw inhale and exhale volumes and export them as nifti files
for exhale_volume, inhale_volume in zip(exhale_volumes, inhale_volumes):
# get the subject name and information
subject_name = exhale_volume.split('/')[-2]
subject_information = dictionary[args.dataset_path.replace('\\', '/').split("/")[-1]][subject_name]
# Access the sitkPixelType value for RAW_DATA
sitk_pixel_type = DataTypes.RAW_DATA.value
# parse the data
print(f"Parsing exhale and inhale volume of subject: {subject_name}, dtype: {sitk_pixel_type}")
exhale_raw = read_raw(
binary_file_name = exhale_volume,
image_size = subject_information['image_dim'],
sitk_pixel_type = sitk_pixel_type,
image_spacing = subject_information['voxel_dim'],
image_origin = subject_information['origin'],
big_endian=False
)
inhale_raw = read_raw(
binary_file_name = inhale_volume,
image_size = subject_information['image_dim'],
sitk_pixel_type = sitk_pixel_type,
image_spacing = subject_information['voxel_dim'],
image_origin = subject_information['origin'],
big_endian=False
)
# log the image sizes
assert exhale_raw.GetSize() == inhale_raw.GetSize(), "Exhale and inhale image sizes do not match"
assert exhale_raw.GetSize() == tuple(subject_information['image_dim']), "Image size does not match the size in the data dictionary"
logger.info(f"Exhale image size: {exhale_raw.GetSize()}")
logger.info(f"Inhale image size: {inhale_raw.GetSize()}")
# saving the nifti files
logger.info(f"Saving the nifti files for subject {subject_name}. \n")
sitk.WriteImage(exhale_raw, exhale_volume.replace('.img', '.nii.gz'))
sitk.WriteImage(inhale_raw, inhale_volume.replace('.img', '.nii.gz'))