forked from mlcommons/GaNDLF
-
Notifications
You must be signed in to change notification settings - Fork 1
/
gandlf_patchMiner
121 lines (99 loc) · 3.47 KB
/
gandlf_patchMiner
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!usr/bin/env python
# -*- coding: utf-8 -*-
import os
from GANDLF.utils import *
fix_paths(os.getcwd()) # add relevant vips path
import time
import warnings
import argparse
import pandas as pd
import openslide
import numpy as np
import yaml
from PIL import Image
from GANDLF.OPM.opm.patch_manager import *
from GANDLF.OPM.opm.utils import *
from functools import partial
from pathlib import Path
Image.MAX_IMAGE_PIXELS = None
warnings.simplefilter("ignore")
def generate_initial_mask(slide_path, scale):
"""
Helper method to generate random coordinates within a slide
:param slide_path: Path to slide (str)
:param num_patches: Number of patches you want to generate
:return: list of n (x,y) coordinates
"""
# Open slide and get properties
slide = openslide.open_slide(slide_path)
slide_dims = slide.dimensions
# Call thumbnail for effiency, calculate scale relative to whole slide
slide_thumbnail = np.asarray(
slide.get_thumbnail((slide_dims[0] // scale, slide_dims[1] // scale))
)
real_scale = (
slide_dims[0] / slide_thumbnail.shape[1],
slide_dims[1] / slide_thumbnail.shape[0],
)
return tissue_mask(slide_thumbnail), real_scale
def parse_gandlf_csv(fpath):
df = pd.read_csv(fpath, dtype=str)
df = df.drop_duplicates()
for index, row in df.iterrows():
yield row["SubjectID"], row["Channel_0"], row["Label"]
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-i",
"--input_CSV",
dest="input_path",
help="input path for the tissue",
required=True,
)
parser.add_argument(
"-o",
"--output_path",
dest="output_path",
default=None,
required=True,
help="output path for the patches",
)
parser.add_argument(
"-c",
"--config",
type=str,
dest="config",
help="config.yml for running OPM. ",
required=True,
)
args = parser.parse_args()
cfg = yaml.load(open(args.config), Loader=yaml.FullLoader)
if not os.path.exists(args.output_path):
print("Output Directory does not exist, we are creating one for you.")
Path(args.output_path).mkdir(parents=True, exist_ok=True)
out_dir = os.path.abspath(args.output_path)
if not out_dir.endswith("/"):
out_dir += "/"
out_csv_path = os.path.join(out_dir, "opm_train.csv")
for sid, slide, label in parse_gandlf_csv(args.input_path):
start = time.time()
# Create new instance of slide manager
manager = PatchManager(slide)
manager.set_label_map(label)
manager.set_subjectID(sid)
# Generate an initial validity mask
mask, scale = generate_initial_mask(slide, cfg["scale"])
print("Setting valid mask...")
manager.set_valid_mask(mask, scale)
# Reject patch if any pixels are transparent
manager.add_patch_criteria(alpha_channel_check)
# Reject patch if image dimensions are not equal to PATCH_SIZE
patch_dims_check = partial(
patch_size_check,
patch_height=cfg["patch_size"][0],
patch_width=cfg["patch_size"][1],
)
manager.add_patch_criteria(patch_dims_check)
# Save patches releases saves all patches stored in manager, dumps to specified output file
manager.mine_patches(out_dir, output_csv=out_csv_path, config=cfg)
print("Total time: {}".format(time.time() - start))