forked from cbaziotis/ntua-slp-pytorch-ex-1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
48 lines (35 loc) · 1.69 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# download from http://nlp.stanford.edu/data/glove.twitter.27B.zip
# WORD_VECTORS = "../embeddings/glove.twitter.27B.50d.txt"
from torch.utils.data import DataLoader
from modules.dataloaders import SentenceDataset
from utils.load_embeddings import load_word_vectors
########################################################
# PARAMETERS
########################################################
EMBEDDINGS = "../embeddings/glove.twitter.27B.50d.txt"
EMB_DIM = 50
BATCH_SIZE = 128
EPOCHS = 50
########################################################
# Define the datasets/dataloaders
########################################################
# 1 - load word embeddings
print("loading word embeddings...")
word2idx, idx2word, embeddings = load_word_vectors(EMBEDDINGS, EMB_DIM)
# you can load the raw data like this:
# train = load_semeval2017A("datasets/Semeval2017A/train_dev")
# val = load_semeval2017A("datasets/Semeval2017A/gold")
# 2 - define the datasets
train_set = SentenceDataset...
test_set = SentenceDataset...
loader_train = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
loader_test = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True)
#############################################################################
# Model Definition (Model, Loss Function, Optimizer)
#############################################################################
# define a simple model, loss function and optimizer
#############################################################################
# Training Pipeline
#############################################################################
# loop the dataset with the dataloader that you defined and train the model
# for each batch return by the dataloader