forked from foamliu/Deep-Image-Matting-PyTorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
160 lines (126 loc) · 5.38 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import argparse
import logging
import os
import cv2 as cv
import numpy as np
import torch
from config import im_size, epsilon, epsilon_sqr
def clip_gradient(optimizer, grad_clip):
"""
Clips gradients computed during backpropagation to avoid explosion of gradients.
:param optimizer: optimizer with the gradients to be clipped
:param grad_clip: clip value
"""
for group in optimizer.param_groups:
for param in group['params']:
if param.grad is not None:
param.grad.data.clamp_(-grad_clip, grad_clip)
def save_checkpoint(epoch, epochs_since_improvement, model, optimizer, loss, is_best):
state = {'epoch': epoch,
'epochs_since_improvement': epochs_since_improvement,
'loss': loss,
'model': model,
'optimizer': optimizer}
# filename = 'checkpoint_' + str(epoch) + '_' + str(loss) + '.tar'
filename = 'checkpoint.tar'
torch.save(state, filename)
# If this checkpoint is the best so far, store a copy so it doesn't get overwritten by a worse checkpoint
if is_best:
torch.save(state, 'BEST_checkpoint.tar')
class AverageMeter(object):
"""
Keeps track of most recent, average, sum, and count of a metric.
"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def adjust_learning_rate(optimizer, shrink_factor):
"""
Shrinks learning rate by a specified factor.
:param optimizer: optimizer whose learning rate must be shrunk.
:param shrink_factor: factor in interval (0, 1) to multiply learning rate with.
"""
print("\nDECAYING learning rate.")
for param_group in optimizer.param_groups:
param_group['lr'] = param_group['lr'] * shrink_factor
print("The new learning rate is %f\n" % (optimizer.param_groups[0]['lr'],))
def get_learning_rate(optimizer):
return optimizer.param_groups[0]['lr']
def accuracy(scores, targets, k=1):
batch_size = targets.size(0)
_, ind = scores.topk(k, 1, True, True)
correct = ind.eq(targets.view(-1, 1).expand_as(ind))
correct_total = correct.view(-1).float().sum() # 0D tensor
return correct_total.item() * (100.0 / batch_size)
def parse_args():
parser = argparse.ArgumentParser(description='Train face network')
# general
parser.add_argument('--end-epoch', type=int, default=1000, help='training epoch size.')
parser.add_argument('--lr', type=float, default=0.01, help='start learning rate')
parser.add_argument('--lr-step', type=int, default=10, help='period of learning rate decay')
parser.add_argument('--optimizer', default='sgd', help='optimizer')
parser.add_argument('--weight-decay', type=float, default=0.0, help='weight decay')
parser.add_argument('--mom', type=float, default=0.9, help='momentum')
parser.add_argument('--batch-size', type=int, default=32, help='batch size in each context')
parser.add_argument('--checkpoint', type=str, default=None, help='checkpoint')
parser.add_argument('--pretrained', type=bool, default=True, help='pretrained model')
args = parser.parse_args()
return args
def get_logger():
logger = logging.getLogger()
handler = logging.StreamHandler()
formatter = logging.Formatter("%(asctime)s %(levelname)s \t%(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.DEBUG)
return logger
def safe_crop(mat, x, y, crop_size=(im_size, im_size)):
crop_height, crop_width = crop_size
if len(mat.shape) == 2:
ret = np.zeros((crop_height, crop_width), np.uint8)
else:
ret = np.zeros((crop_height, crop_width, 3), np.uint8)
crop = mat[y:y + crop_height, x:x + crop_width]
h, w = crop.shape[:2]
ret[0:h, 0:w] = crop
if crop_size != (im_size, im_size):
ret = cv.resize(ret, dsize=(im_size, im_size), interpolation=cv.INTER_NEAREST)
return ret
# alpha prediction loss: the abosolute difference between the ground truth alpha values and the
# predicted alpha values at each pixel. However, due to the non-differentiable property of
# absolute values, we use the following loss function to approximate it.
def alpha_prediction_loss(y_pred, y_true):
mask = y_true[:, 1, :]
diff = y_pred[:, 0, :] - y_true[:, 0, :]
diff = diff * mask
num_pixels = torch.sum(mask)
return torch.sum(torch.sqrt(torch.pow(diff, 2) + epsilon_sqr)) / (num_pixels + epsilon)
# compute the MSE error given a prediction, a ground truth and a trimap.
# pred: the predicted alpha matte
# target: the ground truth alpha matte
# trimap: the given trimap
#
def compute_mse(pred, alpha, trimap):
num_pixels = float((trimap == 128).sum())
return ((pred - alpha) ** 2).sum() / num_pixels
# compute the SAD error given a prediction and a ground truth.
#
def compute_sad(pred, alpha):
diff = np.abs(pred - alpha)
return np.sum(diff) / 1000
def draw_str(dst, target, s):
x, y = target
cv.putText(dst, s, (x + 1, y + 1), cv.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), thickness=2, lineType=cv.LINE_AA)
cv.putText(dst, s, (x, y), cv.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), lineType=cv.LINE_AA)
def ensure_folder(folder):
if not os.path.exists(folder):
os.makedirs(folder)