We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
I'm trying to implement this paper in pytorch,but I don't know how to convert this part ` with tf.variable_scope("decoder_train") as decoder_scope: if decoder_layer == 2: train_decoder_init_state = ( tf.concat([self.z_sample, ori_encoder_state[0], emoji_vec], axis=1), tf.concat([self.z_sample, ori_encoder_state[1], emoji_vec], axis=1) ) dim = latent_dim + num_unit + emoji_dim cell = tf.nn.rnn_cell.MultiRNNCell( [create_rnn_cell(dim, 2, cell_type, num_gpu, self.dropout), create_rnn_cell(dim, 3, cell_type, num_gpu, self.dropout)]) else: train_decoder_init_state = tf.concat([self.z_sample, ori_encoder_state_flat, emoji_vec], axis=1) dim = latent_dim + 2 * num_unit + emoji_dim cell = create_rnn_cell(dim, 2, cell_type, num_gpu, self.dropout)
with tf.variable_scope("attention"): memory = tf.concat([ori_encoder_output[0], ori_encoder_output[1]], axis=2) memory = tf.transpose(memory, [1, 0, 2]) attention_mechanism = seq2seq.LuongAttention( dim, memory, memory_sequence_length=self.ori_len, scale=True) # attention_mechanism = seq2seq.BahdanauAttention( # num_unit, memory, memory_sequence_length=self.ori_len) decoder_cell = seq2seq.AttentionWrapper( cell, attention_mechanism, attention_layer_size=dim) # TODO: add_name; what atten layer size means # decoder_cell = cell helper = seq2seq.TrainingHelper( rep_input_emb, self.rep_len + 1, time_major=True) projection_layer = layers_core.Dense( vocab_size, use_bias=False, name="output_projection") decoder = seq2seq.BasicDecoder( decoder_cell, helper, decoder_cell.zero_state(batch_size, tf.float32).clone(cell_state=train_decoder_init_state), output_layer=projection_layer) train_outputs, _, _ = seq2seq.dynamic_decode( decoder, output_time_major=True, swap_memory=True, scope=decoder_scope ) self.logits = train_outputs.rnn_output with tf.variable_scope("decoder_infer") as decoder_scope: # normal_sample = tf.random_normal(shape=(batch_size, latent_dim)) if decoder_layer == 2: infer_decoder_init_state = ( tf.concat([self.q_z_sample, ori_encoder_state[0], emoji_vec], axis=1), tf.concat([self.q_z_sample, ori_encoder_state[1], emoji_vec], axis=1) ) else: infer_decoder_init_state = tf.concat([self.q_z_sample, ori_encoder_state_flat, emoji_vec], axis=1) start_tokens = tf.fill([batch_size], start_i) end_token = end_i if beam_width > 0: infer_decoder_init_state = seq2seq.tile_batch( infer_decoder_init_state, multiplier=beam_width) decoder = seq2seq.BeamSearchDecoder( cell=decoder_cell, embedding=embedding.coder, start_tokens=start_tokens, end_token=end_token, initial_state=decoder_cell.zero_state( batch_size * beam_width, tf.float32).clone(cell_state=infer_decoder_init_state), beam_width=beam_width, output_layer=projection_layer, length_penalty_weight=0.0) else: helper = seq2seq.GreedyEmbeddingHelper( embedding.coder, start_tokens, end_token) decoder = seq2seq.BasicDecoder( decoder_cell, helper, decoder_cell.zero_state(batch_size, tf.float32).clone(cell_state=infer_decoder_init_state), output_layer=projection_layer # applied per timestep ) # Dynamic decoding infer_outputs, _, infer_lengths = seq2seq.dynamic_decode( decoder, maximum_iterations=maximum_iterations, output_time_major=True, swap_memory=True, scope=decoder_scope ) if beam_width > 0: self.result = infer_outputs.predicted_ids else: self.result = infer_outputs.sample_id self.result_lengths = infer_lengths with tf.variable_scope("loss"): max_time = tf.shape(self.rep_output)[0] with tf.variable_scope("reconstruction"): # TODO: use inference decoder's logits to compute recon_loss cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( # ce = [len, batch_size] labels=self.rep_output, logits=self.logits) # rep: [len, batch_size]; logits: [len, batch_size, vocab_size] target_mask = tf.sequence_mask( self.rep_len + 1, max_time, dtype=self.logits.dtype) # time_major target_mask_t = tf.transpose(target_mask) # max_len batch_size self.recon_losses = tf.reduce_sum(cross_entropy * target_mask_t, axis=0) self.recon_loss = tf.reduce_sum(cross_entropy * target_mask_t) / batch_size with tf.variable_scope("latent"): # without prior network # self.kl_loss = 0.5 * tf.reduce_sum(tf.exp(self.log_var) + self.mu ** 2 - 1. - self.log_var, 0) self.kl_losses = 0.5 * tf.reduce_sum( tf.exp(self.log_var - self.p_log_var) + (self.mu - self.p_mu) ** 2 / tf.exp(self.p_log_var) - 1. - self.log_var + self.p_log_var, axis=1) self.kl_loss = tf.reduce_mean(self.kl_losses) with tf.variable_scope("bow"): # self.bow_loss = self.kl_weight * 0 mlp_b = layers_core.Dense( vocab_size, use_bias=False, name="MLP_b") # is it a mistake that we only model on latent variable? latent_logits = mlp_b(tf.concat( [self.z_sample, ori_encoder_state_flat, emoji_vec], axis=1)) # [batch_size, vocab_size] latent_logits = tf.expand_dims(latent_logits, 0) # [1, batch_size, vocab_size] latent_logits = tf.tile(latent_logits, [max_time, 1, 1]) # [max_time, batch_size, vocab_size] cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( # ce = [len, batch_size] labels=self.rep_output, logits=latent_logits) self.bow_losses = tf.reduce_sum(cross_entropy * target_mask_t, axis=0) self.bow_loss = tf.reduce_sum(cross_entropy * target_mask_t) / batch_size
`
The text was updated successfully, but these errors were encountered:
No branches or pull requests
I'm trying to implement this paper in pytorch,but I don't know how to convert this part ` with tf.variable_scope("decoder_train") as decoder_scope:
if decoder_layer == 2:
train_decoder_init_state = (
tf.concat([self.z_sample, ori_encoder_state[0], emoji_vec], axis=1),
tf.concat([self.z_sample, ori_encoder_state[1], emoji_vec], axis=1)
)
dim = latent_dim + num_unit + emoji_dim
cell = tf.nn.rnn_cell.MultiRNNCell(
[create_rnn_cell(dim, 2, cell_type, num_gpu, self.dropout),
create_rnn_cell(dim, 3, cell_type, num_gpu, self.dropout)])
else:
train_decoder_init_state = tf.concat([self.z_sample, ori_encoder_state_flat, emoji_vec], axis=1)
dim = latent_dim + 2 * num_unit + emoji_dim
cell = create_rnn_cell(dim, 2, cell_type, num_gpu, self.dropout)
`
The text was updated successfully, but these errors were encountered: