forked from sniklaus/teaching-vision
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path17-autoencoder.py
97 lines (65 loc) · 3.05 KB
/
17-autoencoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import torch
print(torch.__version__)
import torchvision
print(torchvision.__version__)
import numpy
import cv2
# generate new images by interpolating between two latent representations
# defining the network, a autoencoder that is similar to the one from the slides
class Network(torch.nn.Module):
def __init__(self):
super(Network, self).__init__()
self.moduleEncoder = torch.nn.Sequential(
torch.nn.Conv2d(1, 32, kernel_size=5),
torch.nn.ReLU(),
torch.nn.Conv2d(32, 64, kernel_size=5),
torch.nn.ReLU(),
torch.nn.Conv2d(64, 128, kernel_size=4, stride=2),
torch.nn.ReLU(),
torch.nn.Conv2d(128, 256, kernel_size=3, stride=2),
torch.nn.ReLU(),
torch.nn.Conv2d(256, 64, kernel_size=4)
)
self.moduleDecoder = torch.nn.Sequential(
torch.nn.ConvTranspose2d(64, 256, kernel_size=4),
torch.nn.ReLU(),
torch.nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2),
torch.nn.ReLU(),
torch.nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2),
torch.nn.ReLU(),
torch.nn.ConvTranspose2d(64, 32, kernel_size=5),
torch.nn.ReLU(),
torch.nn.ConvTranspose2d(32, 1, kernel_size=5)
)
# end
def forward(self, x):
x = self.moduleEncoder(x)
x = self.moduleDecoder(x)
return x
# end
# end
moduleNetwork = Network()
# loading the provided weights, this exercise is not about training the network
moduleNetwork.load_state_dict(torch.load('./17-autoencoder.pytorch'))
# setting the network to the evaluation mode, this makes no difference here though
moduleNetwork.eval()
# loading two samples and converting them to tensors, each of size 1x1x28x28
tensorFirst = torch.FloatTensor(cv2.imread(filename='./samples/fashion-1.png', flags=cv2.IMREAD_GRAYSCALE).astype(numpy.float32) / 255.0).unsqueeze(0).unsqueeze(0)
tensorSecond = torch.FloatTensor(cv2.imread(filename='./samples/fashion-2.png', flags=cv2.IMREAD_GRAYSCALE).astype(numpy.float32) / 255.0).unsqueeze(0).unsqueeze(0)
# encode the two samples to retrieve their representation in the latent space
# generate new samples by interpolating between the two latent representations
# use the formula from the slides with alpha = [ x * 0.1 for x in range(11) ]
# append each interpolated result as a tensor of size 28x28 to tensorOutputs
tensorOutputs = []
# making sure that tensorOutputs has the correct size and content using asserts
# afterwards combining all the samples into a single image and saving it to disk
assert(len(tensorOutputs) == 11)
for tensorOutput in tensorOutputs:
assert(type(tensorOutput) == torch.FloatTensor)
assert(tensorOutput.size(0) == 28)
assert(tensorOutput.size(1) == 28)
# end
tensorOutputs = [ tensorFirst[0, 0] ] + tensorOutputs + [ tensorSecond[0, 0] ]
numpyOutput = (numpy.concatenate([ tensorOutput.numpy() for tensorOutput in tensorOutputs ], 1).clip(0.0, 1.0) * 255.0).astype(numpy.uint8)
numpyOutput = cv2.resize(src=numpyOutput, dsize=None, fx=2.0, fy=2.0, interpolation=cv2.INTER_NEAREST)
cv2.imwrite(filename='./17-autoencoder.png', img=numpyOutput)