-
Notifications
You must be signed in to change notification settings - Fork 1
/
train.py
142 lines (108 loc) · 5.05 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import matplotlib.pyplot as plt
import numpy as np
import torch
from time import time
from random import randint
from torch import nn, optim
from model import NeuralNetwork
from torch.utils.data import DataLoader
from dataset import ASLDataset
from torchvision import transforms
from utils import print_data, yesno, set_seeds, predict_by_max_logit, compute_accuracy_from_predictions
from plot import make_loss_plot, view_classify
# print_data("./training/data.csv") # Print all the training data formatted
def train(epochs=10):
alphabet = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M",
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y"]
set_seeds(seed=randint(0, 9999)) # make learning repeatable by setting seeds
# number of examples to process at one time, the training set (50,000) is too big to do them all at once
batch_size = 44
transform = transforms.Compose(
[
transforms.ToTensor(),
# this maps pixels values from 0 to 255 to the 0 to 1 range and makes a PyTorch tensor
transforms.Normalize((0.5,), (0.5,)), # this then maps the pixel tensor values to the -1 to 1 range
]
)
train_set = ASLDataset("./training/data.csv", transform=transform)
test_set = ASLDataset("./testing/data.csv", transform=transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True)
iterator = iter(train_loader)
signs, labels = next(iterator)
print(signs.shape)
print(labels.shape)
img = signs[0].squeeze()
label = labels[0]
plt.imshow(img, cmap="gray")
plt.show()
print(f"Label: {label}")
figure = plt.figure()
num_of_images = 40
for index in range(1, num_of_images + 1):
plt.subplot(4, 10, index).set_title(f"{alphabet[labels[index]]}")
plt.axis('off')
plt.imshow(signs[index].numpy().squeeze(), cmap='gray')
plt.show()
# define the model
input_size = 784 # 28 x 28 pixels, flattened
hidden_sizes = [128, 64] # sizes of the 2 hidden layers
output_size = 24 # output size - one for each digit
# TODO: Increase the accuracy of the model. Right now its about %4.30
# define the network - 3 linear layers with ReLU activation functions
model = NeuralNetwork()
print("Number of model parameters = {}".format(sum(p.numel() for p in model.parameters())))
# loss is cross entropy loss
loss_fn = nn.CrossEntropyLoss()
# the optimizer
optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9)
# train the model
time_start = time() # set a timer
training_losses = []
for e in range(epochs):
epoch_losses = []
for signs, labels in train_loader:
logits = model(signs) # pass the features forward through the model
loss = loss_fn(logits, labels) # compute the loss
epoch_losses.append(loss)
optimizer.zero_grad() # clear the gradients
loss.backward() # compute the gradients via backpropagation
optimizer.step() # update the weights using the gradients
epoch_loss = np.array(torch.hstack(epoch_losses).detach().numpy()).mean()
training_losses.append(epoch_loss)
print("Epoch {} - Loss: {}".format(e + 1, epoch_loss))
print("\nTraining Time (in minutes) = {}".format((time() - time_start) / 60))
# plot the loss vs epoch
make_loss_plot(epochs, training_losses)
# get the first batch of test examples, so we can examine them
iterator = iter(test_loader)
signs, labels = next(iterator)
# display an image with the probability that it is correct
# Turn off gradients to speed up this part
with torch.no_grad():
prob = torch.softmax(model(signs[0].unsqueeze(dim=0)), dim=-1)
# Output of the network are log-probabilities, need to take exponential for probabilities
probability = list(prob.numpy()[0])
print("Predicted Digit =", probability.index(max(probability)))
view_classify(signs[0].view(1, 28, 28), prob, alphabet)
# compute accuracy on the test set
predictions = []
labels_test = []
with torch.no_grad(): # don't need gradients for testing
for images, labels in test_loader:
labels_test.append(labels)
with torch.no_grad():
logits = model(images)
predictions.append(
predict_by_max_logit(logits)) # make prediction on the class that has the highest value
print("Accuracy = {0:0.1f}%"
.format(compute_accuracy_from_predictions(torch.hstack(predictions), torch.hstack(labels_test)) * 100.0))
if yesno("Training Complete! Do you want to save the model?"):
print("Saving the model...")
# open and read the file after the overwriting:
f = open("./models/modelVersion.txt", "r")
v = int(f.read()) + 1
f = open("./models/modelVersion.txt", "w")
f.write(str(v))
f.close()
torch.save(model.state_dict(), f"./models/SLT-ASLv{v}")