-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
594 lines (435 loc) · 20 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
'''
The copyright in this software is being made available under this Software
Copyright License. This software may be subject to other third party and
contributor rights, including patent rights, and no such rights are
granted under this license.
Copyright (c) 1995 - 2020 Fraunhofer-Gesellschaft zur Förderung der
angewandten Forschung e.V. (Fraunhofer)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted for purpose of testing the functionalities of
this software provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the names of the copyright holders nor the names of its
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, INCLUDING
WITHOUT LIMITATION THE PATENTS OF THE COPYRIGHT HOLDERS AND
CONTRIBUTORS, ARE GRANTED BY THIS SOFTWARE LICENSE. THE
COPYRIGHT HOLDERS AND CONTRIBUTORS PROVIDE NO WARRANTY OF PATENT
NON-INFRINGEMENT WITH RESPECT TO THIS SOFTWARE.
'''
from absl import app
from absl.flags import argparse_flags
import sys
import os
import argparse
import time
import json
import torch
from torchvision import transforms
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
from scipy.stats import pearsonr, spearmanr
import matplotlib.pyplot as plt
from model import DistortionSensitivityModel
from iqaDataFrame import IqaDataFrame
from iqaDataset import IqaDataset, RandomBlocks, ToTensor, FlipHorizontal
def squared_difference(inputs, targets):
"""Elementwise squarred difference between input tensors."""
return (inputs - targets) ** 2
def mse_to_paMse(mse, sensitivity, block_avg_dim):
"""
Compute image wise paMse
Parameters:
-----------
mse : tensor,
Blockwise mse
sensitivity : tensor,
Blockwise estimates of distortion sensitivity. Must have same shape as
mse.
block_avg_dim : int,
Dimension across which to average the blockwise adjusted (local)
paMSEs.
"""
paMSE_block = 10**(sensitivity / 10.) * mse
return paMSE_block.mean(dim=block_avg_dim)
def psnr(mse, epsilon=None, peak=torch.tensor(65025.)):
"""
Compute peak signal-to-noise ratio from mse
Parameters:
-----------
mse : pytorch.float
epsilon : pytorch.float
Stability term used as lower bound for the mse. Should be small ~0.001.
peak : float
Peak value for PSNR, e.g. (2**8 - 1)**2 = 255**2 = 65025.
"""
if not epsilon is None:
mse = torch.max(mse, epsilon)
return 10 * (torch.log10(peak) - torch.log10(mse))
def predict(args, model=None, dataloader=None, loss_function=None,
device=None, return_dataFrame=True, save_dataFrame=True,
save_sensitivity_maps=True, modus=""):
"""
Predict image quality. This method can be used with a trained model and is
also used to make validation set predictions during training. The keyword
arguments are used during training but do not have to be altered when
making predictions using a trained model.
Parameters:
-----------
args : arguments Namespace as returned by argument parser,
Arguments used for prections, see parse_args(). If any other argument
is specified, the other argument will be used and the value in the
args namespace is ignored.
model : pytorch model,
During training the model will be passed via this argument.
dataloader : pytorch dataloader,
During training the validation set's dataloader will be passed via
this argument.
loss_function : pytorch loss function,
During training the loss function will be passed via this argument.
device : str,
Device to use (e.g. 'cuda:0').
return_dataFrame : bool,
If True, a pandas.DataFrame will be returned with the model
predictions. Default is true, but will be set to False during training.
save_dataFrame : bool,
If True, the predictions will be saved under args.save_model_path as a
'predictions.csv'.
save_sensitivity_maps : bool,
If True and args.create_sensitivity_maps == True, the sensitivity maps
corresponding to the model predictions will be saved as .png and .npy
files under args.save_model_path. This argument is only useful during
training to prevent sensitivity maps from being saved in every
iteration over the validation set. (I.e. during training, this
argument will be set to 'False' except for the final model
predictions.)
modus : str,
Will be used as part of the path under which the sensitivity maps are
saved, e.g. pass 'training' to save sensitivity maps under os.path.join
(args.save_model_path, training, *.png)
Returns:
--------
if return_dataFrame == True:
pandas.DataFrame hold ing model predictions
if return_dataFrame == False:
avg_loss : float
average prediction L1 loss
pcc : float
pearson correlation between predictions and true quality targets
srocc : float
spearman correlation between predictions and true quality targets
"""
def image_to_blocks(image, block_size):
"""
Extract as many non-overlapping blocks as possible from a given image,
starting at the top left corner. The remaining pixels that do not make
up further blocks are discarded.
Parameters:
-----------
image : 4d-tensor
Image tensor of dimensions (batch_size, channels, height, width).
Batch_size and channels are supposed to be 1, i.e.
batch_size = channels = 1.
block_size : int
Integer specifying height and width of blocks to be extracted.
Returns:
--------
blocks : 4d-tensor
Tensor of dimensions (blocks, 1, block_size, block_size) that
contains all extracted blocks.
"""
b, c, h, w = image.size()
# number of blocks that fit into image height
numH = h//block_size
# number of blocks that fit into image width
numW = w//block_size
# shrink image to multiple of block_size
image = image[:,:,:numH * block_size, :numW * block_size]
# after this shape will be (batch, height, width, channels)
image = image.transpose(1,2).transpose(2,3)
b, h, w, c = image.size()
# reshape image to blocks
blocks = image.reshape(
[-1, numH, block_size, numW, block_size, 1]).transpose(
2,3).reshape(-1, block_size, block_size, 1)
# after this shape will be (blocks, channels, height, width)
return blocks.transpose(2,3).transpose(1,2)
if model is None:
model = torch.load(args.load_model_path)
model.optimize_gamma = args.optimize_gamma
if device is None:
# is a gpu available?
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
model.to(device)
if loss_function is None:
loss_function = torch.nn.L1Loss()
if dataloader is None:
# prepare dataset
iqaDataFrame = IqaDataFrame(data_dir=args.data_dir,
datasets=args.dataset, distortions=args.distortion,
nTrain=args.nTrain, nVal=args.nVal, testIdx=args.testIdx,
random_seed=args.random_seed, ref_image=args.ref_image,
dist_image=args.dist_image)
dataset = IqaDataset(iqaDataFrame.df_test, transform=ToTensor())
dataloader = DataLoader(dataset, batch_size=1, num_workers=8)
avg_loss = 0
preds = []
paPsnrs = []
qs = []
if return_dataFrame or save_dataFrame:
results = pd.DataFrame(columns=["dataset", "selected_distortion",
"refname", "distortion", "height", "width", "fps", "block_size",
"random_seed", "optimize_gamma", "estimate_on_distorted",
"quality", "q_norm", "mse", "psnr", "papsnr", "preds", "gamma",
"steepness"])
gammas = model.get_gamma_dict()
# stability term for the blockwise mse
epsilon = torch.tensor(0.001).to(device)
model.eval()
for i_batch, sample_batched in enumerate(dataloader):
# send data to gpu, if available
img_ref = sample_batched["img_ref"].to(device)
img_dist = sample_batched["img_dist"].to(device)
q_norm = sample_batched["q_norm"].to(device)
if args.estimate_on_distorted:
# switch input
img_ref, img_dist = img_dist, img_ref
# validation data contains full images, not blocks
blocks_ref = image_to_blocks(img_ref, args.block_size)
blocks_dist = image_to_blocks(img_dist, args.block_size)
block_mse = squared_difference(
blocks_ref, blocks_dist).mean(axis=(2,3))
# apply model, shape: b, c, 1, 1
nn_input = (blocks_ref, sample_batched["distortion"][0])
sensitivity, steepness = model(nn_input)
if args.create_sensitivity_maps and save_sensitivity_maps:
create_sensitivity_maps(sensitivity.detach().cpu().numpy().reshape(
sample_batched["height"]//args.block_size,
sample_batched["width"]//args.block_size),
title=sample_batched["refname"][0].split("/")[-1],
save_dir=os.path.join(args.save_model_path, "sens_maps",modus))
# shape: (1,)
paMSE_img = mse_to_paMse(block_mse, sensitivity, block_avg_dim=(0,1))
# shape: (1,)
paPSNR_img = psnr(paMSE_img, epsilon=epsilon)
# prediction: (1,)
prediction = torch.sigmoid(steepness*paPSNR_img)
loss = loss_function(prediction.squeeze(), q_norm.squeeze())
# track loss and predictions
avg_loss += loss.detach().cpu().squeeze()
paPsnrs.extend(np.atleast_1d(paPSNR_img.detach().cpu().squeeze()))
preds.extend(np.atleast_1d(prediction.detach().cpu().squeeze()))
qs.extend(np.atleast_1d(q_norm.detach().cpu().squeeze()))
if return_dataFrame or save_dataFrame:
results.loc[i_batch, "dataset"] = sample_batched["dataset"][0]
results.loc[i_batch, "selected_distortion"] = args.distortion[0]
results.loc[i_batch, "refname"] = sample_batched["refname"][0]
results.loc[i_batch, "distortion"] = \
sample_batched["distortion"][0]
results.loc[i_batch, "height"] = \
sample_batched["height"].numpy().squeeze()
results.loc[i_batch, "width"] = \
sample_batched["width"].numpy().squeeze()
results.loc[i_batch, "fps"] = \
sample_batched["fps"].numpy().squeeze()
results.loc[i_batch, "block_size"] = args.block_size
results.loc[i_batch, "random_seed"] = args.random_seed
results.loc[i_batch, "optimize_gamma"] = args.optimize_gamma
results.loc[i_batch, "estimate_on_distorted"] = \
args.estimate_on_distorted
results.loc[i_batch, "quality"] = \
sample_batched["quality"].numpy().squeeze()
results.loc[i_batch, "q_norm"] = \
sample_batched["q_norm"].detach().cpu().squeeze().numpy()
results.loc[i_batch, "mse"] = \
sample_batched["mse"].squeeze().numpy()
results.loc[i_batch, "psnr"] = \
psnr(sample_batched["mse"]).squeeze().numpy()
results.loc[i_batch, "papsnr"] = \
paPSNR_img.detach().cpu().squeeze().numpy()
results.loc[i_batch, "preds"] = \
prediction.detach().cpu().squeeze().numpy()
# If the model was not trained on the distortion type that it is
# tested on, the gamma will be set to np.nan. Else the optimized
# gamma will be returned.
_d = sample_batched["distortion"][0]
if _d in gammas:
results.loc[i_batch, "gamma"] = \
gammas[_d].detach().cpu().numpy()[0]
else:
results.loc[i_batch, "gamma"] = np.nan
results.loc[i_batch, "steepness"] = \
steepness.detach().cpu().numpy().squeeze()
if save_dataFrame:
if not os.path.exists(args.save_model_path):
os.makedirs(args.save_model_path)
results.to_csv(os.path.join(args.save_model_path, "predictions.csv"))
if return_dataFrame:
return results
else:
avg_loss = avg_loss/(i_batch+1)
pcc = pearsonr(preds, qs)[0]
srocc = spearmanr(preds, qs)[0]
return avg_loss, pcc, srocc
def create_sensitivity_maps(sensitivity, title, save_dir):
"""
Plot and save a sensitivity map. Will also save the sensitivity map as a
numpy array (.npy file).
Parameters
----------
sensitivity : 2d-array
Sensitivity maps to be plotted.
title : str
Title and filename for the plot.
save_dir : str
Path to directory under which to save the plot
"""
if not os.path.exists(save_dir):
os.makedirs(save_dir)
plt.imshow(sensitivity)
plt.colorbar()
plt.title(title)
plt.tight_layout()
plt.savefig(os.path.join(save_dir, title + ".png"))
plt.close()
np.save(file=os.path.join(save_dir, title + ".npy"), arr=sensitivity)
return
def parse_args(argv):
def str2bool(v):
if isinstance(v, bool):
return v
if v.lower() in ('yes', 'true', '1'):
return True
elif v.lower() in ('no', 'false', '0'):
return False
else:
raise argparse.ArgumentTypeError('Boolean value expected.')
parser = argparse_flags.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
### Generic arguments ###
parser.add_argument("--data_dir",
type=str, default="../datasets")
parser.add_argument("--block_size",
type=int, default=32,
help="Block size for which to estimate distortion sensitivity. "
"Requires a single int which specifies both width and height.")
parser.add_argument("--random_seed",
type=int, default=None,
help="Random seed for tensorflow and numpy.")
parser.add_argument("--load_model_path",
type=str, default=None,
help="You may specify the path to an existing model to continue "
"training or to use this model for predictions.")
parser.add_argument("--dataset",
type=str, nargs="+",
help="Names of datasets to be used.")
parser.add_argument("--distortion",
type=str, nargs="+", required=True,
help="Distortion type(s) to use. Defaults to using all distortion "
"types.")
parser.add_argument("--nTrain",
type=int, default=17,
help="Number of reference images in the training set.")
parser.add_argument("--nVal",
type=int, default=6,
help="Number of reference images in the validation set.")
parser.add_argument("--testIdx",
type=int, default=None,
help="You can select a specific reference image which (together with "
"the corresponding distorted images) will constitute the test "
"set. The index refers to the n-th reference image, in "
"alphabetical order.")
parser.add_argument("--estimate_on_distorted",
default=False, type=str2bool,
help="Switch the input to the neural network: If this flag is used, "
"the distorted image is fed into the neural network instead of "
"the reference image.")
parser.add_argument("--save_model_path",
type=str, default="./saved_models",
help="Location under which to store the trained model and results.")
parser.add_argument("--create_sensitivity_maps",
default=False, type=str2bool,
help="Create sensitivity maps.")
parser.add_argument("--optimize_gamma",
default=False, type=str2bool,
help="If true, the model will optimize distortion-type specific "
"factors that scale the distortion sensitivity estimated by the "
"neural network. A model trained with this mechanism enabled can "
"only be evaluated on the same distortion types that were used "
"during training. (We used this to produce the gamma-star "
"results in the paper).")
subparsers = parser.add_subparsers(title="command",
dest="command",
help="Choose 'train' to optimize a new model or 'predict' to use an "
"existing model to make predictions.")
### train arguments ###
train_cmd = subparsers.add_parser("train",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="Train a new model.")
train_cmd.add_argument("--batch_size",
type=int, default=4,
help="(Maximum) Number of reference images per training-batch. The "
"actual batch-size, i.e., the number of data samples per batch, "
"is determined by this argument in conjunction with --num_blocks "
"as (--batch_size * --num_blocks)")
train_cmd.add_argument("--num_blocks_per_image",
type=int, default=8,
help="Number of blocks to sample from a single image.")
train_cmd.add_argument("--n_epochs",
type=int, default=300,
help="Number of training epochs.")
### predict arguments ###
predict_cmd = subparsers.add_parser("predict",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="Load a trained model and compute prediction on new data.")
predict_cmd.add_argument("--ref_image",
type=str, default=None,
help="Path to a reference image. This argument will override the "
"generic dataset argument and is useful if you want to deploy "
"the model on single images (or, more generally, on data that "
"has not been formated according to the 'iqaDataFrame' class). "
"If you use this argument, you will also need to supply a path "
"for a distorted image via '--dist_image'.")
predict_cmd.add_argument("--dist_image",
type=str, default=None,
help="Path to distorted image. See help for '--ref_image'.")
# Parse arguments.
args = parser.parse_args(argv[1:])
if args.command is None:
# should be 'train' or 'predict'
parser.print_usage()
sys.exit(2)
return args
def main(args):
if not args.random_seed is None:
torch.manual_seed(args.random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
# Invoke subcommand.
if args.command == "train":
train(args)
elif args.command == "predict":
predict(args)
if __name__ == "__main__":
app.run(main, flags_parser=parse_args)