You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
When I try to load the glove vectors,while training the LSTM, I get the following error:
AttributeError: 'dict' object has no attribute 'word_vec'
This happens when I try to train the LSTM with the following code :
def get_init_embedding(reverse_dict, embedding_size):
print("Loading GLove vectors..")
with open("C:/Users/sensen/OneDrive - HERE Global B.V-/Desktop/NLP/glove.6B.300d_pickle", 'rb') as handle:
word_vectors = pickle.load(handle)
Building model architecture
class Model(object):
def init(self, reversed_dict, article_max_len, summary_max_len, args, forward_only=False):
self.vocabulary_size = len(reversed_dict)
self.embedding_size = args.embedding_size
self.num_hidden = args.num_hidden
self.num_layers = args.num_layers
self.learning_rate = args.learning_rate
self.beam_width = args.beam_width
if not forward_only:#forward_only=In training phase, keep_prob is used for defining drop out %
self.keep_prob = args.keep_prob
else:
self.keep_prob = 1.0
self.cell = tf.nn.rnn_cell.BasicLSTMCell #initializing an LSTM cell
with tf.variable_scope("decoder/projection"):#projection layer used in decoder in both training and testing.Projection layer is used for converting indices of individual words to continous weight vector
self.projection_layer = tf.layers.Dense(self.vocabulary_size, use_bias=False)
#Defining batch size
self.batch_size = tf.placeholder(tf.int32, (), name="batch_size")
self.X = tf.placeholder(tf.int32, [None, article_max_len]) #Defining X again for subsequent steps
self.X_len = tf.placeholder(tf.int32, [None])#Defining X as length of articles. Its a place holder cause length of articles will become input that will be called in runtime env
self.decoder_input = tf.placeholder(tf.int32, [None, summary_max_len])#Starting to define decoder
self.decoder_len = tf.placeholder(tf.int32, [None])#Defining a decoder lenght
self.decoder_target = tf.placeholder(tf.int32, [None, summary_max_len]) #Defining a decoder target
self.global_step = tf.Variable(0, trainable=False)
#EMBEDDING LAYER
with tf.name_scope("embedding"):
if not forward_only and args.glove:#if in training phase and if glove is used
init_embeddings = tf.constant(get_init_embedding(reversed_dict, self.embedding_size), dtype=tf.float32)#Constant function because word embedding wont change as part of dict.Get_Init_embedding is a function that returns the vector for each word in our dict
else:
init_embeddings = tf.random_uniform([self.vocabulary_size, self.embedding_size], -1.0, 1.0)#if embedding is in testing phase, no constant dict is available. initializing a random variable
self.embeddings = tf.get_variable("embeddings", initializer=init_embeddings)
self.encoder_emb_inp = tf.transpose(tf.nn.embedding_lookup(self.embeddings, self.X), perm=[1, 0, 2]) #encoder input
self.decoder_emb_inp = tf.transpose(tf.nn.embedding_lookup(self.embeddings, self.decoder_input), perm=[1, 0, 2]) #decoder input
with tf.name_scope("encoder"):
fw_cells = [self.cell(self.num_hidden) for _ in range(self.num_layers)]
bw_cells = [self.cell(self.num_hidden) for _ in range(self.num_layers)]
fw_cells = [rnn.DropoutWrapper(cell) for cell in fw_cells]
bw_cells = [rnn.DropoutWrapper(cell) for cell in bw_cells]
encoder_outputs, encoder_state_fw, encoder_state_bw = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
fw_cells, bw_cells, self.encoder_emb_inp,
sequence_length=self.X_len, time_major=True, dtype=tf.float32)
self.encoder_output = tf.concat(encoder_outputs, 2)
encoder_state_c = tf.concat((encoder_state_fw[0].c, encoder_state_bw[0].c), 1)
encoder_state_h = tf.concat((encoder_state_fw[0].h, encoder_state_bw[0].h), 1)
self.encoder_state = rnn.LSTMStateTuple(c=encoder_state_c, h=encoder_state_h)
with tf.name_scope("decoder"), tf.variable_scope("decoder") as decoder_scope:
decoder_cell = self.cell(self.num_hidden * 2)
if not forward_only:
attention_states = tf.transpose(self.encoder_output, [1, 0, 2])
attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
self.num_hidden * 2, attention_states, memory_sequence_length=self.X_len, normalize=True)
decoder_cell = tf.contrib.seq2seq.AttentionWrapper(decoder_cell, attention_mechanism,
attention_layer_size=self.num_hidden * 2)
initial_state = decoder_cell.zero_state(dtype=tf.float32, batch_size=self.batch_size)
initial_state = initial_state.clone(cell_state=self.encoder_state)
helper = tf.contrib.seq2seq.TrainingHelper(self.decoder_emb_inp, self.decoder_len, time_major=True)
decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, helper, initial_state)
outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, output_time_major=True, scope=decoder_scope)
self.decoder_output = outputs.rnn_output
self.logits = tf.transpose(
self.projection_layer(self.decoder_output), perm=[1, 0, 2])
self.logits_reshape = tf.concat(
[self.logits, tf.zeros([self.batch_size, summary_max_len - tf.shape(self.logits)[1], self.vocabulary_size])], axis=1)
else:
tiled_encoder_output = tf.contrib.seq2seq.tile_batch(
tf.transpose(self.encoder_output, perm=[1, 0, 2]), multiplier=self.beam_width)
tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch(self.encoder_state, multiplier=self.beam_width)
tiled_seq_len = tf.contrib.seq2seq.tile_batch(self.X_len, multiplier=self.beam_width)
attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
self.num_hidden * 2, tiled_encoder_output, memory_sequence_length=tiled_seq_len, normalize=True)
decoder_cell = tf.contrib.seq2seq.AttentionWrapper(decoder_cell, attention_mechanism,
attention_layer_size=self.num_hidden * 2)
initial_state = decoder_cell.zero_state(dtype=tf.float32, batch_size=self.batch_size * self.beam_width)
initial_state = initial_state.clone(cell_state=tiled_encoder_final_state)
decoder = tf.contrib.seq2seq.BeamSearchDecoder(
cell=decoder_cell,
embedding=self.embeddings,
start_tokens=tf.fill([self.batch_size], tf.constant(2)),
end_token=tf.constant(3),
initial_state=initial_state,
beam_width=self.beam_width,
output_layer=self.projection_layer
)
outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
decoder, output_time_major=True, maximum_iterations=summary_max_len, scope=decoder_scope)
self.prediction = tf.transpose(outputs.predicted_ids, perm=[1, 2, 0])
with tf.name_scope("loss"):
if not forward_only:
crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=self.logits_reshape, labels=self.decoder_target)
weights = tf.sequence_mask(self.decoder_len, summary_max_len, dtype=tf.float32)
self.loss = tf.reduce_sum(crossent * weights / tf.to_float(self.batch_size))
params = tf.trainable_variables()
gradients = tf.gradients(self.loss, params)
clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
optimizer = tf.train.AdamOptimizer(self.learning_rate)
self.update = optimizer.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step)
Training
import time
start = time.perf_counter()
import tensorflow as tf
import argparse
import pickle
import os
with tf.Session() as sess:
model = Model(reverse_dict, article_max_len, summary_max_len, args)
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver(tf.global_variables())
if 'old_model_checkpoint_path' in globals():
print("Continuing from previous trained model:" , old_model_checkpoint_path , "...")
saver.restore(sess, old_model_checkpoint_path )
batches = batch_iter(train_x, train_y, args.batch_size, args.num_epochs)
num_batches_per_epoch = (len(train_x) - 1) // args.batch_size + 1
print("\nIteration starts.")
print("Number of batches per epoch :", num_batches_per_epoch)
for batch_x, batch_y in batches:
batch_x_len = list(map(lambda x: len([y for y in x if y != 0]), batch_x))
batch_decoder_input = list(map(lambda x: [word_dict["<s>"]] + list(x), batch_y))
batch_decoder_len = list(map(lambda x: len([y for y in x if y != 0]), batch_decoder_input))
batch_decoder_output = list(map(lambda x: list(x) + [word_dict["</s>"]], batch_y))
batch_decoder_input = list(
map(lambda d: d + (summary_max_len - len(d)) * [word_dict["<padding>"]], batch_decoder_input))
batch_decoder_output = list(
map(lambda d: d + (summary_max_len - len(d)) * [word_dict["<padding>"]], batch_decoder_output))
train_feed_dict = {
model.batch_size: len(batch_x),
model.X: batch_x,
model.X_len: batch_x_len,
model.decoder_input: batch_decoder_input,
model.decoder_len: batch_decoder_len,
model.decoder_target: batch_decoder_output
}
_, step, loss = sess.run([model.update, model.global_step, model.loss], feed_dict=train_feed_dict)
if step % 1000 == 0:
print("step {0}: loss = {1}".format(step, loss))
if step % num_batches_per_epoch == 0:
hours, rem = divmod(time.perf_counter() - start, 3600)
minutes, seconds = divmod(rem, 60)
saver.save(sess, "C:/Users/sensen/OneDrive - HERE Global B.V-/Desktop/NLP/Open source libraries/Text summarization", global_step=step)
print(" Epoch {0}: Model is saved.".format(step // num_batches_per_epoch),
"Elapsed: {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds) , "\n")
I have converted the downloaded glove txt into pickle using the following code:
import pickle
import numpy as np
f = open('C:/Users/sensen/OneDrive - HERE Global B.V-/Desktop/NLP/Open source libraries/Text summarization/glove.6B/glove.6B.300d.txt', 'r', encoding='UTF-8')
g = open('glove.6B.300d_pickle', 'wb')
word_dict = {}
wordvec = []
for idx, line in enumerate(f.readlines()):
word_split = line.split(' ')
word = word_split[0]
word_dict[word] = idx
d = word_split[1:]
d[-1] = d[-1][:-1]
d = [float(e) for e in d]
wordvec.append(d)
Hi
When I try to load the glove vectors,while training the LSTM, I get the following error:
AttributeError: 'dict' object has no attribute 'word_vec'
This happens when I try to train the LSTM with the following code :
def get_init_embedding(reverse_dict, embedding_size):
print("Loading GLove vectors..")
with open("C:/Users/sensen/OneDrive - HERE Global B.V-/Desktop/NLP/glove.6B.300d_pickle", 'rb') as handle:
word_vectors = pickle.load(handle)
#loop through all the words in reverse dict
Building model architecture
class Model(object):
def init(self, reversed_dict, article_max_len, summary_max_len, args, forward_only=False):
self.vocabulary_size = len(reversed_dict)
self.embedding_size = args.embedding_size
self.num_hidden = args.num_hidden
self.num_layers = args.num_layers
self.learning_rate = args.learning_rate
self.beam_width = args.beam_width
if not forward_only:#forward_only=In training phase, keep_prob is used for defining drop out %
self.keep_prob = args.keep_prob
else:
self.keep_prob = 1.0
self.cell = tf.nn.rnn_cell.BasicLSTMCell #initializing an LSTM cell
with tf.variable_scope("decoder/projection"):#projection layer used in decoder in both training and testing.Projection layer is used for converting indices of individual words to continous weight vector
self.projection_layer = tf.layers.Dense(self.vocabulary_size, use_bias=False)
#Defining batch size
self.batch_size = tf.placeholder(tf.int32, (), name="batch_size")
self.X = tf.placeholder(tf.int32, [None, article_max_len]) #Defining X again for subsequent steps
self.X_len = tf.placeholder(tf.int32, [None])#Defining X as length of articles. Its a place holder cause length of articles will become input that will be called in runtime env
self.decoder_input = tf.placeholder(tf.int32, [None, summary_max_len])#Starting to define decoder
self.decoder_len = tf.placeholder(tf.int32, [None])#Defining a decoder lenght
self.decoder_target = tf.placeholder(tf.int32, [None, summary_max_len]) #Defining a decoder target
self.global_step = tf.Variable(0, trainable=False)
Training
import time
start = time.perf_counter()
import tensorflow as tf
import argparse
import pickle
import os
class args:
pass
args.num_hidden=150
args.num_layers=2
args.beam_width=10
args.glove="store_true"
args.embedding_size=300
args.learning_rate=1e-3
args.batch_size=64
args.num_epochs=10
args.keep_prob = 0.8
args.toy=False #"store_true"
args.with_model="store_true"
if not os.path.exists("saved_model"):
os.mkdir("saved_model")
else:
if args.with_model:
old_model_checkpoint_path = open('saved_model/', 'r')
old_model_checkpoint_path = "".join(["saved_model/",old_model_checkpoint_path.read().splitlines()[0].split('"')[1] ])
print("Building dictionary...")
word_dict, reverse_dict, article_max_len, summary_max_len = build_dict("train", args.toy)
print("Loading training dataset...")
train_x, train_y = build_dataset("train", word_dict, article_max_len, summary_max_len, args.toy)
tf.reset_default_graph()
with tf.Session() as sess:
model = Model(reverse_dict, article_max_len, summary_max_len, args)
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver(tf.global_variables())
if 'old_model_checkpoint_path' in globals():
print("Continuing from previous trained model:" , old_model_checkpoint_path , "...")
saver.restore(sess, old_model_checkpoint_path )
I have converted the downloaded glove txt into pickle using the following code:
import pickle
import numpy as np
f = open('C:/Users/sensen/OneDrive - HERE Global B.V-/Desktop/NLP/Open source libraries/Text summarization/glove.6B/glove.6B.300d.txt', 'r', encoding='UTF-8')
g = open('glove.6B.300d_pickle', 'wb')
word_dict = {}
wordvec = []
for idx, line in enumerate(f.readlines()):
word_split = line.split(' ')
word = word_split[0]
word_dict[word] = idx
d = word_split[1:]
d[-1] = d[-1][:-1]
d = [float(e) for e in d]
wordvec.append(d)
embedding = np.array(wordvec)
pickling = {}
pickling = {'embedding' : embedding, 'word_dict': word_dict}
pickle.dump(pickling, g)
f.close()
g.close()
Can you help me solve the error?
The text was updated successfully, but these errors were encountered: