-
Notifications
You must be signed in to change notification settings - Fork 2
/
inference.py
179 lines (157 loc) · 6.7 KB
/
inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import argparse
import os
import shutil
import numpy as np
import torch
import torch.nn.functional as F
from PIL import Image
from torchvision import datasets, transforms
from tqdm import tqdm
import models
class SegmentationFolder(datasets.ImageFolder):
def __getitem__(self, index):
path = self.imgs[index][0]
sample = self.loader(path)
height, width = sample.size[1], sample.size[0]
if self.transform is not None:
sample = self.transform(sample)
return sample, path, height, width
def parse_args():
parser = argparse.ArgumentParser(description='Inference')
parser.add_argument('--nb_classes', type=int, default=50)
parser.add_argument('--mode',
type=str,
required=True,
help='validation or test',
choices=['validation', 'test'])
parser.add_argument('--output_dir',
type=str,
default=None,
help='the path to save segmentation masks')
parser.add_argument('--data_path',
type=str,
default=None,
help='path to imagenetS dataset')
parser.add_argument('--finetune',
type=str,
default=None,
help='the model checkpoint file')
parser.add_argument('--pretrained_rfnext',
default='',
help='pretrained weights for RF-Next')
parser.add_argument('--model',
default='vit_small_patch16',
help='model architecture')
parser.add_argument('--patch_size',
type=int,
default=4,
help='For convnext/rfconvnext, the numnber of output channels is '
'nb_classes * patch_size * patch_size.'
'https://arxiv.org/pdf/2111.06377.pdf')
parser.add_argument(
'--max_res',
default=1000,
type=int,
help='Maximum resolution for evaluation. 0 for disable.')
parser.add_argument('--method',
default='example submission',
help='Method name in method description file(.txt).')
parser.add_argument('--train_data',
default='null',
help='Training data in method description file(.txt).')
parser.add_argument(
'--train_scheme',
default='null',
help='Training scheme in method description file(.txt), \
e.g., SSL, Sup, SSL+Sup.')
parser.add_argument(
'--link',
default='null',
help='Paper/project link in method description file(.txt).')
parser.add_argument(
'--description',
default='null',
help='Method description in method description file(.txt).')
args = parser.parse_args()
return args
def main():
args = parse_args()
# build model
model = models.__dict__[args.model](args)
model = model.cuda()
model.eval()
# load checkpoints
checkpoint = torch.load(args.finetune)['model']
model.load_state_dict(checkpoint, strict=True)
# build the dataloader
dataset_path = os.path.join(args.data_path, args.mode)
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
dataset = SegmentationFolder(root=dataset_path,
transform=transforms.Compose([
transforms.Resize(256),
transforms.ToTensor(),
normalize,
]))
dataloader = torch.utils.data.DataLoader(dataset,
batch_size=1,
num_workers=16,
pin_memory=True)
output_dir = os.path.join(args.output_dir, args.mode)
for images, path, height, width in tqdm(dataloader):
path = path[0]
cate = path.split('/')[-2]
name = path.split('/')[-1].split('.')[0]
if not os.path.exists(os.path.join(output_dir, cate)):
os.makedirs(os.path.join(output_dir, cate))
with torch.no_grad():
H = height.item()
W = width.item()
output = model.forward(images.cuda())
if (H > W and H * W > args.max_res * args.max_res
and args.max_res > 0):
output = F.interpolate(
output, (args.max_res, int(args.max_res * W / H)),
mode='bilinear',
align_corners=False)
output = torch.argmax(output, dim=1, keepdim=True)
output = F.interpolate(output.float(), (H, W),
mode='nearest').long()
elif (H <= W and H * W > args.max_res * args.max_res
and args.max_res > 0):
output = F.interpolate(
output, (int(args.max_res * H / W), args.max_res),
mode='bilinear',
align_corners=False)
output = torch.argmax(output, dim=1, keepdim=True)
output = F.interpolate(output.float(), (H, W),
mode='nearest').long()
else:
output = F.interpolate(output, (H, W),
mode='bilinear',
align_corners=False)
output = torch.argmax(output, dim=1, keepdim=True)
output = output.squeeze()
res = torch.zeros(size=(output.shape[0], output.shape[1], 3))
res[:, :, 0] = output % 256
res[:, :, 1] = output // 256
res = res.cpu().numpy()
res = Image.fromarray(res.astype(np.uint8))
res.save(os.path.join(output_dir, cate, name + '.png'))
if args.mode == 'test':
method = 'Method name: {}\n'.format(
args.method) + \
'Training data: {}\nTraining scheme: {}\n'.format(
args.train_data, args.train_scheme) + \
'Networks: {}\nPaper/Project link: {}\n'.format(
args.model, args.link) + \
'Method description: {}'.format(
args.description)
with open(os.path.join(output_dir, 'method.txt'), 'w') as f:
f.write(method)
# zip for submission
shutil.make_archive(os.path.join(args.output_dir, args.mode),
'zip',
root_dir=output_dir)
if __name__ == '__main__':
main()