Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

my result is very bad #2

Open
diweiqiang opened this issue Apr 30, 2019 · 3 comments
Open

my result is very bad #2

diweiqiang opened this issue Apr 30, 2019 · 3 comments

Comments

@diweiqiang
Copy link

image
the reconstruction loss is very high as you can see in the image, but I don't know why

@diweiqiang
Copy link
Author

my code is here
even if i comment the "flag = False", the result is still the same except the run time is much longer
"""
代码来源:https://github.com/mmeendez8/Autoencoder
配套讲解:https://medium.com/@miguelmendez_/vaes-i-generating-images-with-tensorflow-f81b2f1c63b0
"""
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np
import os
import shutil
import seaborn as sns
from matplotlib.colors import ListedColormap

sns.set_style(style='white')

Network parameters

tf.flags.DEFINE_float('learning_rate', .0005, 'Initial learning rate.')
tf.flags.DEFINE_integer('epochs', 20, 'Number of steps to run trainer.')
tf.flags.DEFINE_integer('batch_size', 128, 'Minibatch size')
tf.flags.DEFINE_integer('latent_dim', 2, 'Number of latent dimensions')
tf.flags.DEFINE_integer('test_image_number', 5, 'Number of test images to recover during training')
tf.flags.DEFINE_integer('inputs_decoder', 49, 'Size of decoder input layer')
tf.flags.DEFINE_string('dataset', 'mnist', 'Dataset name [mnist, fashion-mnist]')
tf.flags.DEFINE_string('logdir', './logs', 'Logs folder')
tf.flags.DEFINE_bool('plot_latent', True, 'Plot latent space')

FLAGS = tf.flags.FLAGS

Define and create results folders

results_folder = os.path.join('Results', FLAGS.dataset)
[os.makedirs(os.path.join(results_folder, folder)) for folder in ['Test', 'Train']
if not os.path.exists(os.path.join(results_folder, folder))]

Empty log folder

try:
if not len(os.listdir(FLAGS.logdir)) == 0:
# 可参考:https://blog.csdn.net/HappyRocking/article/details/79806808
# 简单的在true_temp下测试了下,会把logdir也会删除掉
shutil.rmtree(FLAGS.logdir)
except:
pass

Get data

data = keras.datasets.mnist if FLAGS.dataset == 'mnist' else keras.datasets.fashion_mnist

test_images维度:(10000, 28, 28)

(train_images, train_labels), (test_images, test_labels) = data.load_data()

Create tf dataset

with tf.variable_scope("DataPipe"):
# 数据读取可参考知乎文章:https://zhuanlan.zhihu.com/p/37384376?utm_source=ZHShareTargetIDMore&utm_medium=social&utm_oi=610739698650583040
# 参考https://blog.csdn.net/qi_1221/article/details/79460875
# https://www.jianshu.com/p/f580f4fc2ba0
# dataset的shape:(28, 28)
dataset = tf.data.Dataset.from_tensor_slices(train_images)
# 可全局搜map:https://www.tensorflow.org/guide/datasets?hl=zh-cn
# 参考官网,会将值转换到0、1之间
dataset = dataset.map(lambda x: tf.image.convert_image_dtype([x], dtype=tf.float32))
# prefetch的作用可参考下面链接的相关代码部分的解释
# https://towardsdatascience.com/vaes-generating-images-with-tensorflow-61de08e82f1f
dataset = dataset.batch(batch_size=FLAGS.batch_size).prefetch(FLAGS.batch_size)

# 参考:https://www.jiqizhixin.com/articles/03137
iterator = dataset.make_initializable_iterator()
input_batch = iterator.get_next()
input_batch = tf.reshape(input_batch, shape=[-1, 28, 28, 1])

def encoder(X):
activation = tf.nn.relu
with tf.variable_scope("Encoder"):
# 这一部分查看解析原文对encoder部分的解释
# if we add a convolutional layer with a stride of 2 and some extra padding too,
# we can reduce the image dimension to the half
x = tf.layers.conv2d(X, filters=64, kernel_size=4, strides=2, padding='same', activation=activation)
x = tf.layers.conv2d(x, filters=64, kernel_size=4, strides=2, padding='same', activation=activation)
x = tf.layers.conv2d(x, filters=64, kernel_size=4, strides=1, padding='same', activation=activation)
x = tf.layers.flatten(x)

    # Local latent variables
    mean_ = tf.layers.dense(x, units=FLAGS.latent_dim, name='mean')
    # 注意这里是通过softplus函数保证标准差是正的
    std_dev = tf.nn.softplus(tf.layers.dense(x, units=FLAGS.latent_dim), name='std_dev')  # softplus to force >0

    # Reparametrization trick
    # 建一个sess,run一下tf.stack([tf.shape(x)[0], FLAGS.latent_dim])就知道输出结果了,感觉根本没必要用stack,本意上就是要生成几行几列的随机数
    epsilon = tf.random_normal(tf.stack([tf.shape(x)[0], FLAGS.latent_dim]), name='epsilon')
    # 用到了broadcast
    z = mean_ + tf.multiply(epsilon, std_dev)

    return z, mean_, std_dev

def decoder(z):
activation = tf.nn.relu
with tf.variable_scope("Decoder"):
# 下面两句的作用见原文解释中的一句话:
# It’s common to apply some non linear transformations using dense layers before the transposed ones.
x = tf.layers.dense(z, units=FLAGS.inputs_decoder, activation=activation)
# x的维度:(?,49),这里是平的是因为encoder部分有一个拉平的步骤x = tf.layers.flatten(x)
x = tf.layers.dense(x, units=FLAGS.inputs_decoder, activation=activation)
# FLAGS.inputs_decoder=49
recovered_size = int(np.sqrt(FLAGS.inputs_decoder))
# 把拉平的量再reshape回图片的格式,方便做卷积
x = tf.reshape(x, [-1, recovered_size, recovered_size, 1])

    # 这里面的参数不难确定,因为编码器中卷积部分到了7x7, 后面虽然加了一些操作,但并没有实质性的改变此结构,做转置卷积的时候的输入还是7x7,
    # 那么参数的确定就是按照转置卷积的公式逆向回去就行了,
    x = tf.layers.conv2d_transpose(x, filters=64, kernel_size=4, strides=1, padding='same', activation=activation)
    x = tf.layers.conv2d_transpose(x, filters=64, kernel_size=4, strides=1, padding='same', activation=activation)
    x = tf.layers.conv2d_transpose(x, filters=64, kernel_size=4, strides=1, padding='same', activation=activation)

    x = tf.contrib.layers.flatten(x)
    x = tf.layers.dense(x, units=28 * 28, activation=None)

    x = tf.layers.dense(x, units=28 * 28, activation=tf.nn.sigmoid)
    img = tf.reshape(x, shape=[-1, 28, 28, 1])
    return img

Link encoder and decoder

mean维度:(?,2)

z, mean_, std_dev = encoder(input_batch)
output = decoder(z)

Reshape input and output to flat vectors

flat_output = tf.reshape(output, [-1, 28 * 28])
flat_input = tf.reshape(input_batch, [-1, 28 * 28])

with tf.name_scope('loss'):
# 因为输入和输出都是在0、1之间,所以可以用交叉熵计算
img_loss = tf.reduce_sum(flat_input * -tf.log(flat_output) + (1 - flat_input) * -tf.log(1 - flat_output), 1)
# 这个是在计算KL散度,见https://zhuanlan.zhihu.com/p/34998569的式4
latent_loss = 0.5 * tf.reduce_sum(tf.square(mean_) + tf.square(std_dev) - tf.log(tf.square(std_dev)) - 1, 1)
loss = tf.reduce_mean(img_loss + latent_loss)
tf.summary.scalar('batch_loss', loss)

optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(loss)

init_vars = [tf.local_variables_initializer(), tf.global_variables_initializer()]
gpu_options = tf.GPUOptions(allow_growth=True)

Training loop

with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
writer = tf.summary.FileWriter('./logs', sess.graph)

sess.run(init_vars)
merged_summary_op = tf.summary.merge_all()

for epoch in range(FLAGS.epochs):
    sess.run(iterator.initializer)
    print('Actual epoch: {}'.format(epoch))

    flag = True  # Show only first batch of epoch

    while True:
        try:
            sess.run(optimizer)
            if flag:

                # Get input and recover output images comparison
                summ, target, output_ = sess.run([merged_summary_op, input_batch, output])
                f, axarr = plt.subplots(FLAGS.test_image_number, 2)

                for j in range(FLAGS.test_image_number):
                    # 注意enumerate枚举的是最外面的这个list:[target, output_],所以可以枚举两次
                    for pos, im in enumerate([target, output_]):
                        # 注意im也就是target或output_里面是一个batch的量,所以是在取钱FLAGS.test_image_number个图片
                        axarr[j, pos].imshow(im[j].reshape((28, 28)), cmap='gray')
                        axarr[j, pos].axis('off')

                plt.savefig(os.path.join(results_folder, 'Train/Epoch_{}').format(epoch))
                plt.close(f)
                flag = False
                writer.add_summary(summ, epoch)

                # Create artificial image from unit norm sample
                artificial_image = sess.run(output, feed_dict={z: np.random.normal(0, 1, (1, FLAGS.latent_dim))})
                plt.figure()
                with sns.axes_style("white"):
                    plt.imshow(artificial_image[0].reshape((28, 28)), cmap='gray')
                plt.savefig(os.path.join(results_folder, 'Test/{}'.format(epoch)))
                plt.close()

                # Create plot of latent space (only if latent dimensions are 2)
                if FLAGS.latent_dim == 2 and FLAGS.plot_latent:
                    # 这句test_images[..., np.newaxis]/255.应该是为了符合input_batch的输入形式,可跳到定义出查看
                    coords = sess.run(z, feed_dict={input_batch: test_images[..., np.newaxis]/255.})
                    colormap = ListedColormap(sns.color_palette(sns.hls_palette(10, l=.45, s=.8)).as_hex())
                    plt.scatter(coords[:, 0], coords[:, 1], c=test_labels, cmap=colormap)

                    cbar = plt.colorbar()
                    if FLAGS.dataset == 'fashion-mnist':
                        cbar.ax.set_yticklabels(['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal',
                                                 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'])

                    # plt.axis('off')
                    plt.title('Latent space')
                    plt.savefig(os.path.join(results_folder, 'Test/Latent_{}'.format(epoch)))
                    plt.close()

        except tf.errors.OutOfRangeError:
            break

    # Create mesh grid of values
    # 参看原文对这一部分的解释,就是网格搜索得到的潜变量对应的图,
    values = np.arange(-3, 4, .5)
    xx, yy = np.meshgrid(values, values)
    input_holder = np.zeros((1, 2))
    # Matrix that will contain the grid of images
    container = np.zeros((28 * len(values), 28 * len(values)))

    # xx是一个tuple:(14, 14), 即上面网格搜索的范围长度np.arange(-3, 4, .5)
    for row in range(xx.shape[0]):
        for col in range(xx.shape[1]):
            input_holder[0, :] = [xx[row, col], yy[row, col]]
            artificial_image = sess.run(output, feed_dict={z: input_holder})
            container[row * 28: (row + 1) * 28, col * 28: (col + 1) * 28] = np.squeeze(artificial_image)

    plt.imshow(container, cmap='gray')
    plt.savefig(os.path.join(results_folder, 'Test/Space_{}'.format(epoch)))
    plt.close()

@diweiqiang
Copy link
Author

diweiqiang commented Apr 30, 2019

or simply run the code downloaded from this github, I can not get any meaningful thing, it's really odd. is there anything i forgot to do?

@leonschat
Copy link

have you slove this problem? i have the same result as you

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants