-
Notifications
You must be signed in to change notification settings - Fork 33
/
imdb-embedding.py
58 lines (44 loc) · 1.6 KB
/
imdb-embedding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# need to download glove from http://nlp.stanford.edu/data/glove.6B.zip
# wget http://nlp.stanford.edu/data/glove.6B.zip
# unzip http://nlp.stanford.edu/data/glove.6B.zip
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding, LSTM
from keras.layers import Conv1D, Flatten
from keras.datasets import imdb
import wandb
from wandb.keras import WandbCallback
import imdb
import numpy as np
from keras.preprocessing import text
wandb.init()
config = wandb.config
# set parameters:
config.vocab_size = 1000
config.maxlen = 300
config.batch_size = 32
config.embedding_dims = 50
config.filters = 250
config.kernel_size = 3
config.hidden_dims = 100
config.epochs = 10
(X_train, y_train), (X_test, y_test) = imdb.load_imdb()
tokenizer = text.Tokenizer(num_words=config.vocab_size)
tokenizer.fit_on_texts(X_train)
X_train = tokenizer.texts_to_matrix(X_train)
X_test = tokenizer.texts_to_matrix(X_test)
X_train = sequence.pad_sequences(X_train, maxlen=config.maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=config.maxlen)
## create model
model = Sequential()
model.add(Embedding(config.vocab_size, 100, input_length=config.maxlen)
model.add(LSTM(config.hidden_dims, activation="sigmoid"))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
model.fit(X_train, y_train,
batch_size=config.batch_size,
epochs=config.epochs,
validation_data=(X_test, y_test), callbacks=[WandbCallback()])